• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 //
6 // Auto-generated file. Do not edit!
7 //   Specification: test/f32-velu.yaml
8 //   Generator: tools/generate-vunary-test.py
9 
10 
11 #include <gtest/gtest.h>
12 
13 #include <xnnpack/common.h>
14 #include <xnnpack/isa-checks.h>
15 
16 #include <xnnpack/vunary.h>
17 #include "vunary-microkernel-tester.h"
18 
19 
20 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEON_RR2_LUT16_P3_X4,batch_eq_4)21   TEST(F32_VELU__NEON_RR2_LUT16_P3_X4, batch_eq_4) {
22     TEST_REQUIRES_ARM_NEON;
23     VUnaryMicrokernelTester()
24       .batch_size(4)
25       .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x4, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
26   }
27 
TEST(F32_VELU__NEON_RR2_LUT16_P3_X4,batch_div_4)28   TEST(F32_VELU__NEON_RR2_LUT16_P3_X4, batch_div_4) {
29     TEST_REQUIRES_ARM_NEON;
30     for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
31       VUnaryMicrokernelTester()
32         .batch_size(batch_size)
33         .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x4, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
34     }
35   }
36 
TEST(F32_VELU__NEON_RR2_LUT16_P3_X4,batch_lt_4)37   TEST(F32_VELU__NEON_RR2_LUT16_P3_X4, batch_lt_4) {
38     TEST_REQUIRES_ARM_NEON;
39     for (size_t batch_size = 1; batch_size < 4; batch_size++) {
40       VUnaryMicrokernelTester()
41         .batch_size(batch_size)
42         .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x4, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
43     }
44   }
45 
TEST(F32_VELU__NEON_RR2_LUT16_P3_X4,batch_gt_4)46   TEST(F32_VELU__NEON_RR2_LUT16_P3_X4, batch_gt_4) {
47     TEST_REQUIRES_ARM_NEON;
48     for (size_t batch_size = 5; batch_size < 8; batch_size++) {
49       VUnaryMicrokernelTester()
50         .batch_size(batch_size)
51         .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x4, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
52     }
53   }
54 
TEST(F32_VELU__NEON_RR2_LUT16_P3_X4,inplace)55   TEST(F32_VELU__NEON_RR2_LUT16_P3_X4, inplace) {
56     TEST_REQUIRES_ARM_NEON;
57     for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
58       VUnaryMicrokernelTester()
59         .batch_size(batch_size)
60         .inplace(true)
61         .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x4, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
62     }
63   }
64 
TEST(F32_VELU__NEON_RR2_LUT16_P3_X4,prescale)65   TEST(F32_VELU__NEON_RR2_LUT16_P3_X4, prescale) {
66     TEST_REQUIRES_ARM_NEON;
67     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
68       for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
69         VUnaryMicrokernelTester()
70           .batch_size(batch_size)
71           .prescale(prescale)
72           .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x4, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
73       }
74     }
75   }
76 
TEST(F32_VELU__NEON_RR2_LUT16_P3_X4,alpha)77   TEST(F32_VELU__NEON_RR2_LUT16_P3_X4, alpha) {
78     TEST_REQUIRES_ARM_NEON;
79     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
80       for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
81         VUnaryMicrokernelTester()
82           .batch_size(batch_size)
83           .alpha(alpha)
84           .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x4, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
85       }
86     }
87   }
88 
TEST(F32_VELU__NEON_RR2_LUT16_P3_X4,beta)89   TEST(F32_VELU__NEON_RR2_LUT16_P3_X4, beta) {
90     TEST_REQUIRES_ARM_NEON;
91     for (float beta : std::vector<float>({0.3f, 3.0f})) {
92       for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
93         VUnaryMicrokernelTester()
94           .batch_size(batch_size)
95           .beta(beta)
96           .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x4, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
97       }
98     }
99   }
100 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
101 
102 
103 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEON_RR2_LUT16_P3_X8,batch_eq_8)104   TEST(F32_VELU__NEON_RR2_LUT16_P3_X8, batch_eq_8) {
105     TEST_REQUIRES_ARM_NEON;
106     VUnaryMicrokernelTester()
107       .batch_size(8)
108       .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x8, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
109   }
110 
TEST(F32_VELU__NEON_RR2_LUT16_P3_X8,batch_div_8)111   TEST(F32_VELU__NEON_RR2_LUT16_P3_X8, batch_div_8) {
112     TEST_REQUIRES_ARM_NEON;
113     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
114       VUnaryMicrokernelTester()
115         .batch_size(batch_size)
116         .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x8, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
117     }
118   }
119 
TEST(F32_VELU__NEON_RR2_LUT16_P3_X8,batch_lt_8)120   TEST(F32_VELU__NEON_RR2_LUT16_P3_X8, batch_lt_8) {
121     TEST_REQUIRES_ARM_NEON;
122     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
123       VUnaryMicrokernelTester()
124         .batch_size(batch_size)
125         .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x8, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
126     }
127   }
128 
TEST(F32_VELU__NEON_RR2_LUT16_P3_X8,batch_gt_8)129   TEST(F32_VELU__NEON_RR2_LUT16_P3_X8, batch_gt_8) {
130     TEST_REQUIRES_ARM_NEON;
131     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
132       VUnaryMicrokernelTester()
133         .batch_size(batch_size)
134         .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x8, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
135     }
136   }
137 
TEST(F32_VELU__NEON_RR2_LUT16_P3_X8,inplace)138   TEST(F32_VELU__NEON_RR2_LUT16_P3_X8, inplace) {
139     TEST_REQUIRES_ARM_NEON;
140     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
141       VUnaryMicrokernelTester()
142         .batch_size(batch_size)
143         .inplace(true)
144         .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x8, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
145     }
146   }
147 
TEST(F32_VELU__NEON_RR2_LUT16_P3_X8,prescale)148   TEST(F32_VELU__NEON_RR2_LUT16_P3_X8, prescale) {
149     TEST_REQUIRES_ARM_NEON;
150     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
151       for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
152         VUnaryMicrokernelTester()
153           .batch_size(batch_size)
154           .prescale(prescale)
155           .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x8, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
156       }
157     }
158   }
159 
TEST(F32_VELU__NEON_RR2_LUT16_P3_X8,alpha)160   TEST(F32_VELU__NEON_RR2_LUT16_P3_X8, alpha) {
161     TEST_REQUIRES_ARM_NEON;
162     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
163       for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
164         VUnaryMicrokernelTester()
165           .batch_size(batch_size)
166           .alpha(alpha)
167           .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x8, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
168       }
169     }
170   }
171 
TEST(F32_VELU__NEON_RR2_LUT16_P3_X8,beta)172   TEST(F32_VELU__NEON_RR2_LUT16_P3_X8, beta) {
173     TEST_REQUIRES_ARM_NEON;
174     for (float beta : std::vector<float>({0.3f, 3.0f})) {
175       for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
176         VUnaryMicrokernelTester()
177           .batch_size(batch_size)
178           .beta(beta)
179           .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x8, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
180       }
181     }
182   }
183 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
184 
185 
186 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEON_RR2_LUT16_P3_X12,batch_eq_12)187   TEST(F32_VELU__NEON_RR2_LUT16_P3_X12, batch_eq_12) {
188     TEST_REQUIRES_ARM_NEON;
189     VUnaryMicrokernelTester()
190       .batch_size(12)
191       .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x12, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
192   }
193 
TEST(F32_VELU__NEON_RR2_LUT16_P3_X12,batch_div_12)194   TEST(F32_VELU__NEON_RR2_LUT16_P3_X12, batch_div_12) {
195     TEST_REQUIRES_ARM_NEON;
196     for (size_t batch_size = 24; batch_size < 120; batch_size += 12) {
197       VUnaryMicrokernelTester()
198         .batch_size(batch_size)
199         .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x12, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
200     }
201   }
202 
TEST(F32_VELU__NEON_RR2_LUT16_P3_X12,batch_lt_12)203   TEST(F32_VELU__NEON_RR2_LUT16_P3_X12, batch_lt_12) {
204     TEST_REQUIRES_ARM_NEON;
205     for (size_t batch_size = 1; batch_size < 12; batch_size++) {
206       VUnaryMicrokernelTester()
207         .batch_size(batch_size)
208         .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x12, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
209     }
210   }
211 
TEST(F32_VELU__NEON_RR2_LUT16_P3_X12,batch_gt_12)212   TEST(F32_VELU__NEON_RR2_LUT16_P3_X12, batch_gt_12) {
213     TEST_REQUIRES_ARM_NEON;
214     for (size_t batch_size = 13; batch_size < 24; batch_size++) {
215       VUnaryMicrokernelTester()
216         .batch_size(batch_size)
217         .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x12, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
218     }
219   }
220 
TEST(F32_VELU__NEON_RR2_LUT16_P3_X12,inplace)221   TEST(F32_VELU__NEON_RR2_LUT16_P3_X12, inplace) {
222     TEST_REQUIRES_ARM_NEON;
223     for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
224       VUnaryMicrokernelTester()
225         .batch_size(batch_size)
226         .inplace(true)
227         .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x12, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
228     }
229   }
230 
TEST(F32_VELU__NEON_RR2_LUT16_P3_X12,prescale)231   TEST(F32_VELU__NEON_RR2_LUT16_P3_X12, prescale) {
232     TEST_REQUIRES_ARM_NEON;
233     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
234       for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
235         VUnaryMicrokernelTester()
236           .batch_size(batch_size)
237           .prescale(prescale)
238           .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x12, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
239       }
240     }
241   }
242 
TEST(F32_VELU__NEON_RR2_LUT16_P3_X12,alpha)243   TEST(F32_VELU__NEON_RR2_LUT16_P3_X12, alpha) {
244     TEST_REQUIRES_ARM_NEON;
245     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
246       for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
247         VUnaryMicrokernelTester()
248           .batch_size(batch_size)
249           .alpha(alpha)
250           .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x12, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
251       }
252     }
253   }
254 
TEST(F32_VELU__NEON_RR2_LUT16_P3_X12,beta)255   TEST(F32_VELU__NEON_RR2_LUT16_P3_X12, beta) {
256     TEST_REQUIRES_ARM_NEON;
257     for (float beta : std::vector<float>({0.3f, 3.0f})) {
258       for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
259         VUnaryMicrokernelTester()
260           .batch_size(batch_size)
261           .beta(beta)
262           .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x12, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
263       }
264     }
265   }
266 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
267 
268 
269 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEON_RR2_LUT16_P3_X16,batch_eq_16)270   TEST(F32_VELU__NEON_RR2_LUT16_P3_X16, batch_eq_16) {
271     TEST_REQUIRES_ARM_NEON;
272     VUnaryMicrokernelTester()
273       .batch_size(16)
274       .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x16, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
275   }
276 
TEST(F32_VELU__NEON_RR2_LUT16_P3_X16,batch_div_16)277   TEST(F32_VELU__NEON_RR2_LUT16_P3_X16, batch_div_16) {
278     TEST_REQUIRES_ARM_NEON;
279     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
280       VUnaryMicrokernelTester()
281         .batch_size(batch_size)
282         .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x16, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
283     }
284   }
285 
TEST(F32_VELU__NEON_RR2_LUT16_P3_X16,batch_lt_16)286   TEST(F32_VELU__NEON_RR2_LUT16_P3_X16, batch_lt_16) {
287     TEST_REQUIRES_ARM_NEON;
288     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
289       VUnaryMicrokernelTester()
290         .batch_size(batch_size)
291         .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x16, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
292     }
293   }
294 
TEST(F32_VELU__NEON_RR2_LUT16_P3_X16,batch_gt_16)295   TEST(F32_VELU__NEON_RR2_LUT16_P3_X16, batch_gt_16) {
296     TEST_REQUIRES_ARM_NEON;
297     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
298       VUnaryMicrokernelTester()
299         .batch_size(batch_size)
300         .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x16, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
301     }
302   }
303 
TEST(F32_VELU__NEON_RR2_LUT16_P3_X16,inplace)304   TEST(F32_VELU__NEON_RR2_LUT16_P3_X16, inplace) {
305     TEST_REQUIRES_ARM_NEON;
306     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
307       VUnaryMicrokernelTester()
308         .batch_size(batch_size)
309         .inplace(true)
310         .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x16, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
311     }
312   }
313 
TEST(F32_VELU__NEON_RR2_LUT16_P3_X16,prescale)314   TEST(F32_VELU__NEON_RR2_LUT16_P3_X16, prescale) {
315     TEST_REQUIRES_ARM_NEON;
316     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
317       for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
318         VUnaryMicrokernelTester()
319           .batch_size(batch_size)
320           .prescale(prescale)
321           .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x16, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
322       }
323     }
324   }
325 
TEST(F32_VELU__NEON_RR2_LUT16_P3_X16,alpha)326   TEST(F32_VELU__NEON_RR2_LUT16_P3_X16, alpha) {
327     TEST_REQUIRES_ARM_NEON;
328     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
329       for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
330         VUnaryMicrokernelTester()
331           .batch_size(batch_size)
332           .alpha(alpha)
333           .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x16, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
334       }
335     }
336   }
337 
TEST(F32_VELU__NEON_RR2_LUT16_P3_X16,beta)338   TEST(F32_VELU__NEON_RR2_LUT16_P3_X16, beta) {
339     TEST_REQUIRES_ARM_NEON;
340     for (float beta : std::vector<float>({0.3f, 3.0f})) {
341       for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
342         VUnaryMicrokernelTester()
343           .batch_size(batch_size)
344           .beta(beta)
345           .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x16, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
346       }
347     }
348   }
349 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
350 
351 
352 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEON_RR2_LUT16_P3_X20,batch_eq_20)353   TEST(F32_VELU__NEON_RR2_LUT16_P3_X20, batch_eq_20) {
354     TEST_REQUIRES_ARM_NEON;
355     VUnaryMicrokernelTester()
356       .batch_size(20)
357       .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x20, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
358   }
359 
TEST(F32_VELU__NEON_RR2_LUT16_P3_X20,batch_div_20)360   TEST(F32_VELU__NEON_RR2_LUT16_P3_X20, batch_div_20) {
361     TEST_REQUIRES_ARM_NEON;
362     for (size_t batch_size = 40; batch_size < 200; batch_size += 20) {
363       VUnaryMicrokernelTester()
364         .batch_size(batch_size)
365         .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x20, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
366     }
367   }
368 
TEST(F32_VELU__NEON_RR2_LUT16_P3_X20,batch_lt_20)369   TEST(F32_VELU__NEON_RR2_LUT16_P3_X20, batch_lt_20) {
370     TEST_REQUIRES_ARM_NEON;
371     for (size_t batch_size = 1; batch_size < 20; batch_size++) {
372       VUnaryMicrokernelTester()
373         .batch_size(batch_size)
374         .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x20, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
375     }
376   }
377 
TEST(F32_VELU__NEON_RR2_LUT16_P3_X20,batch_gt_20)378   TEST(F32_VELU__NEON_RR2_LUT16_P3_X20, batch_gt_20) {
379     TEST_REQUIRES_ARM_NEON;
380     for (size_t batch_size = 21; batch_size < 40; batch_size++) {
381       VUnaryMicrokernelTester()
382         .batch_size(batch_size)
383         .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x20, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
384     }
385   }
386 
TEST(F32_VELU__NEON_RR2_LUT16_P3_X20,inplace)387   TEST(F32_VELU__NEON_RR2_LUT16_P3_X20, inplace) {
388     TEST_REQUIRES_ARM_NEON;
389     for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
390       VUnaryMicrokernelTester()
391         .batch_size(batch_size)
392         .inplace(true)
393         .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x20, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
394     }
395   }
396 
TEST(F32_VELU__NEON_RR2_LUT16_P3_X20,prescale)397   TEST(F32_VELU__NEON_RR2_LUT16_P3_X20, prescale) {
398     TEST_REQUIRES_ARM_NEON;
399     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
400       for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
401         VUnaryMicrokernelTester()
402           .batch_size(batch_size)
403           .prescale(prescale)
404           .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x20, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
405       }
406     }
407   }
408 
TEST(F32_VELU__NEON_RR2_LUT16_P3_X20,alpha)409   TEST(F32_VELU__NEON_RR2_LUT16_P3_X20, alpha) {
410     TEST_REQUIRES_ARM_NEON;
411     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
412       for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
413         VUnaryMicrokernelTester()
414           .batch_size(batch_size)
415           .alpha(alpha)
416           .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x20, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
417       }
418     }
419   }
420 
TEST(F32_VELU__NEON_RR2_LUT16_P3_X20,beta)421   TEST(F32_VELU__NEON_RR2_LUT16_P3_X20, beta) {
422     TEST_REQUIRES_ARM_NEON;
423     for (float beta : std::vector<float>({0.3f, 3.0f})) {
424       for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
425         VUnaryMicrokernelTester()
426           .batch_size(batch_size)
427           .beta(beta)
428           .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x20, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
429       }
430     }
431   }
432 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
433 
434 
435 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEON_RR2_LUT16_P3_X24,batch_eq_24)436   TEST(F32_VELU__NEON_RR2_LUT16_P3_X24, batch_eq_24) {
437     TEST_REQUIRES_ARM_NEON;
438     VUnaryMicrokernelTester()
439       .batch_size(24)
440       .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x24, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
441   }
442 
TEST(F32_VELU__NEON_RR2_LUT16_P3_X24,batch_div_24)443   TEST(F32_VELU__NEON_RR2_LUT16_P3_X24, batch_div_24) {
444     TEST_REQUIRES_ARM_NEON;
445     for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
446       VUnaryMicrokernelTester()
447         .batch_size(batch_size)
448         .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x24, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
449     }
450   }
451 
TEST(F32_VELU__NEON_RR2_LUT16_P3_X24,batch_lt_24)452   TEST(F32_VELU__NEON_RR2_LUT16_P3_X24, batch_lt_24) {
453     TEST_REQUIRES_ARM_NEON;
454     for (size_t batch_size = 1; batch_size < 24; batch_size++) {
455       VUnaryMicrokernelTester()
456         .batch_size(batch_size)
457         .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x24, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
458     }
459   }
460 
TEST(F32_VELU__NEON_RR2_LUT16_P3_X24,batch_gt_24)461   TEST(F32_VELU__NEON_RR2_LUT16_P3_X24, batch_gt_24) {
462     TEST_REQUIRES_ARM_NEON;
463     for (size_t batch_size = 25; batch_size < 48; batch_size++) {
464       VUnaryMicrokernelTester()
465         .batch_size(batch_size)
466         .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x24, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
467     }
468   }
469 
TEST(F32_VELU__NEON_RR2_LUT16_P3_X24,inplace)470   TEST(F32_VELU__NEON_RR2_LUT16_P3_X24, inplace) {
471     TEST_REQUIRES_ARM_NEON;
472     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
473       VUnaryMicrokernelTester()
474         .batch_size(batch_size)
475         .inplace(true)
476         .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x24, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
477     }
478   }
479 
TEST(F32_VELU__NEON_RR2_LUT16_P3_X24,prescale)480   TEST(F32_VELU__NEON_RR2_LUT16_P3_X24, prescale) {
481     TEST_REQUIRES_ARM_NEON;
482     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
483       for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
484         VUnaryMicrokernelTester()
485           .batch_size(batch_size)
486           .prescale(prescale)
487           .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x24, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
488       }
489     }
490   }
491 
TEST(F32_VELU__NEON_RR2_LUT16_P3_X24,alpha)492   TEST(F32_VELU__NEON_RR2_LUT16_P3_X24, alpha) {
493     TEST_REQUIRES_ARM_NEON;
494     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
495       for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
496         VUnaryMicrokernelTester()
497           .batch_size(batch_size)
498           .alpha(alpha)
499           .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x24, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
500       }
501     }
502   }
503 
TEST(F32_VELU__NEON_RR2_LUT16_P3_X24,beta)504   TEST(F32_VELU__NEON_RR2_LUT16_P3_X24, beta) {
505     TEST_REQUIRES_ARM_NEON;
506     for (float beta : std::vector<float>({0.3f, 3.0f})) {
507       for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
508         VUnaryMicrokernelTester()
509           .batch_size(batch_size)
510           .beta(beta)
511           .Test(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x24, xnn_init_f32_elu_neon_rr2_lut16_p3_params);
512       }
513     }
514   }
515 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
516 
517 
518 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEON_RR2_P6_X4,batch_eq_4)519   TEST(F32_VELU__NEON_RR2_P6_X4, batch_eq_4) {
520     TEST_REQUIRES_ARM_NEON;
521     VUnaryMicrokernelTester()
522       .batch_size(4)
523       .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x4, xnn_init_f32_elu_neon_rr2_p6_params);
524   }
525 
TEST(F32_VELU__NEON_RR2_P6_X4,batch_div_4)526   TEST(F32_VELU__NEON_RR2_P6_X4, batch_div_4) {
527     TEST_REQUIRES_ARM_NEON;
528     for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
529       VUnaryMicrokernelTester()
530         .batch_size(batch_size)
531         .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x4, xnn_init_f32_elu_neon_rr2_p6_params);
532     }
533   }
534 
TEST(F32_VELU__NEON_RR2_P6_X4,batch_lt_4)535   TEST(F32_VELU__NEON_RR2_P6_X4, batch_lt_4) {
536     TEST_REQUIRES_ARM_NEON;
537     for (size_t batch_size = 1; batch_size < 4; batch_size++) {
538       VUnaryMicrokernelTester()
539         .batch_size(batch_size)
540         .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x4, xnn_init_f32_elu_neon_rr2_p6_params);
541     }
542   }
543 
TEST(F32_VELU__NEON_RR2_P6_X4,batch_gt_4)544   TEST(F32_VELU__NEON_RR2_P6_X4, batch_gt_4) {
545     TEST_REQUIRES_ARM_NEON;
546     for (size_t batch_size = 5; batch_size < 8; batch_size++) {
547       VUnaryMicrokernelTester()
548         .batch_size(batch_size)
549         .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x4, xnn_init_f32_elu_neon_rr2_p6_params);
550     }
551   }
552 
TEST(F32_VELU__NEON_RR2_P6_X4,inplace)553   TEST(F32_VELU__NEON_RR2_P6_X4, inplace) {
554     TEST_REQUIRES_ARM_NEON;
555     for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
556       VUnaryMicrokernelTester()
557         .batch_size(batch_size)
558         .inplace(true)
559         .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x4, xnn_init_f32_elu_neon_rr2_p6_params);
560     }
561   }
562 
TEST(F32_VELU__NEON_RR2_P6_X4,prescale)563   TEST(F32_VELU__NEON_RR2_P6_X4, prescale) {
564     TEST_REQUIRES_ARM_NEON;
565     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
566       for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
567         VUnaryMicrokernelTester()
568           .batch_size(batch_size)
569           .prescale(prescale)
570           .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x4, xnn_init_f32_elu_neon_rr2_p6_params);
571       }
572     }
573   }
574 
TEST(F32_VELU__NEON_RR2_P6_X4,alpha)575   TEST(F32_VELU__NEON_RR2_P6_X4, alpha) {
576     TEST_REQUIRES_ARM_NEON;
577     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
578       for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
579         VUnaryMicrokernelTester()
580           .batch_size(batch_size)
581           .alpha(alpha)
582           .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x4, xnn_init_f32_elu_neon_rr2_p6_params);
583       }
584     }
585   }
586 
TEST(F32_VELU__NEON_RR2_P6_X4,beta)587   TEST(F32_VELU__NEON_RR2_P6_X4, beta) {
588     TEST_REQUIRES_ARM_NEON;
589     for (float beta : std::vector<float>({0.3f, 3.0f})) {
590       for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
591         VUnaryMicrokernelTester()
592           .batch_size(batch_size)
593           .beta(beta)
594           .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x4, xnn_init_f32_elu_neon_rr2_p6_params);
595       }
596     }
597   }
598 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
599 
600 
601 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEON_RR2_P6_X8,batch_eq_8)602   TEST(F32_VELU__NEON_RR2_P6_X8, batch_eq_8) {
603     TEST_REQUIRES_ARM_NEON;
604     VUnaryMicrokernelTester()
605       .batch_size(8)
606       .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x8, xnn_init_f32_elu_neon_rr2_p6_params);
607   }
608 
TEST(F32_VELU__NEON_RR2_P6_X8,batch_div_8)609   TEST(F32_VELU__NEON_RR2_P6_X8, batch_div_8) {
610     TEST_REQUIRES_ARM_NEON;
611     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
612       VUnaryMicrokernelTester()
613         .batch_size(batch_size)
614         .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x8, xnn_init_f32_elu_neon_rr2_p6_params);
615     }
616   }
617 
TEST(F32_VELU__NEON_RR2_P6_X8,batch_lt_8)618   TEST(F32_VELU__NEON_RR2_P6_X8, batch_lt_8) {
619     TEST_REQUIRES_ARM_NEON;
620     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
621       VUnaryMicrokernelTester()
622         .batch_size(batch_size)
623         .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x8, xnn_init_f32_elu_neon_rr2_p6_params);
624     }
625   }
626 
TEST(F32_VELU__NEON_RR2_P6_X8,batch_gt_8)627   TEST(F32_VELU__NEON_RR2_P6_X8, batch_gt_8) {
628     TEST_REQUIRES_ARM_NEON;
629     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
630       VUnaryMicrokernelTester()
631         .batch_size(batch_size)
632         .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x8, xnn_init_f32_elu_neon_rr2_p6_params);
633     }
634   }
635 
TEST(F32_VELU__NEON_RR2_P6_X8,inplace)636   TEST(F32_VELU__NEON_RR2_P6_X8, inplace) {
637     TEST_REQUIRES_ARM_NEON;
638     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
639       VUnaryMicrokernelTester()
640         .batch_size(batch_size)
641         .inplace(true)
642         .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x8, xnn_init_f32_elu_neon_rr2_p6_params);
643     }
644   }
645 
TEST(F32_VELU__NEON_RR2_P6_X8,prescale)646   TEST(F32_VELU__NEON_RR2_P6_X8, prescale) {
647     TEST_REQUIRES_ARM_NEON;
648     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
649       for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
650         VUnaryMicrokernelTester()
651           .batch_size(batch_size)
652           .prescale(prescale)
653           .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x8, xnn_init_f32_elu_neon_rr2_p6_params);
654       }
655     }
656   }
657 
TEST(F32_VELU__NEON_RR2_P6_X8,alpha)658   TEST(F32_VELU__NEON_RR2_P6_X8, alpha) {
659     TEST_REQUIRES_ARM_NEON;
660     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
661       for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
662         VUnaryMicrokernelTester()
663           .batch_size(batch_size)
664           .alpha(alpha)
665           .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x8, xnn_init_f32_elu_neon_rr2_p6_params);
666       }
667     }
668   }
669 
TEST(F32_VELU__NEON_RR2_P6_X8,beta)670   TEST(F32_VELU__NEON_RR2_P6_X8, beta) {
671     TEST_REQUIRES_ARM_NEON;
672     for (float beta : std::vector<float>({0.3f, 3.0f})) {
673       for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
674         VUnaryMicrokernelTester()
675           .batch_size(batch_size)
676           .beta(beta)
677           .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x8, xnn_init_f32_elu_neon_rr2_p6_params);
678       }
679     }
680   }
681 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
682 
683 
684 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEON_RR2_P6_X12,batch_eq_12)685   TEST(F32_VELU__NEON_RR2_P6_X12, batch_eq_12) {
686     TEST_REQUIRES_ARM_NEON;
687     VUnaryMicrokernelTester()
688       .batch_size(12)
689       .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x12, xnn_init_f32_elu_neon_rr2_p6_params);
690   }
691 
TEST(F32_VELU__NEON_RR2_P6_X12,batch_div_12)692   TEST(F32_VELU__NEON_RR2_P6_X12, batch_div_12) {
693     TEST_REQUIRES_ARM_NEON;
694     for (size_t batch_size = 24; batch_size < 120; batch_size += 12) {
695       VUnaryMicrokernelTester()
696         .batch_size(batch_size)
697         .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x12, xnn_init_f32_elu_neon_rr2_p6_params);
698     }
699   }
700 
TEST(F32_VELU__NEON_RR2_P6_X12,batch_lt_12)701   TEST(F32_VELU__NEON_RR2_P6_X12, batch_lt_12) {
702     TEST_REQUIRES_ARM_NEON;
703     for (size_t batch_size = 1; batch_size < 12; batch_size++) {
704       VUnaryMicrokernelTester()
705         .batch_size(batch_size)
706         .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x12, xnn_init_f32_elu_neon_rr2_p6_params);
707     }
708   }
709 
TEST(F32_VELU__NEON_RR2_P6_X12,batch_gt_12)710   TEST(F32_VELU__NEON_RR2_P6_X12, batch_gt_12) {
711     TEST_REQUIRES_ARM_NEON;
712     for (size_t batch_size = 13; batch_size < 24; batch_size++) {
713       VUnaryMicrokernelTester()
714         .batch_size(batch_size)
715         .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x12, xnn_init_f32_elu_neon_rr2_p6_params);
716     }
717   }
718 
TEST(F32_VELU__NEON_RR2_P6_X12,inplace)719   TEST(F32_VELU__NEON_RR2_P6_X12, inplace) {
720     TEST_REQUIRES_ARM_NEON;
721     for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
722       VUnaryMicrokernelTester()
723         .batch_size(batch_size)
724         .inplace(true)
725         .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x12, xnn_init_f32_elu_neon_rr2_p6_params);
726     }
727   }
728 
TEST(F32_VELU__NEON_RR2_P6_X12,prescale)729   TEST(F32_VELU__NEON_RR2_P6_X12, prescale) {
730     TEST_REQUIRES_ARM_NEON;
731     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
732       for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
733         VUnaryMicrokernelTester()
734           .batch_size(batch_size)
735           .prescale(prescale)
736           .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x12, xnn_init_f32_elu_neon_rr2_p6_params);
737       }
738     }
739   }
740 
TEST(F32_VELU__NEON_RR2_P6_X12,alpha)741   TEST(F32_VELU__NEON_RR2_P6_X12, alpha) {
742     TEST_REQUIRES_ARM_NEON;
743     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
744       for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
745         VUnaryMicrokernelTester()
746           .batch_size(batch_size)
747           .alpha(alpha)
748           .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x12, xnn_init_f32_elu_neon_rr2_p6_params);
749       }
750     }
751   }
752 
TEST(F32_VELU__NEON_RR2_P6_X12,beta)753   TEST(F32_VELU__NEON_RR2_P6_X12, beta) {
754     TEST_REQUIRES_ARM_NEON;
755     for (float beta : std::vector<float>({0.3f, 3.0f})) {
756       for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
757         VUnaryMicrokernelTester()
758           .batch_size(batch_size)
759           .beta(beta)
760           .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x12, xnn_init_f32_elu_neon_rr2_p6_params);
761       }
762     }
763   }
764 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
765 
766 
767 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEON_RR2_P6_X16,batch_eq_16)768   TEST(F32_VELU__NEON_RR2_P6_X16, batch_eq_16) {
769     TEST_REQUIRES_ARM_NEON;
770     VUnaryMicrokernelTester()
771       .batch_size(16)
772       .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x16, xnn_init_f32_elu_neon_rr2_p6_params);
773   }
774 
TEST(F32_VELU__NEON_RR2_P6_X16,batch_div_16)775   TEST(F32_VELU__NEON_RR2_P6_X16, batch_div_16) {
776     TEST_REQUIRES_ARM_NEON;
777     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
778       VUnaryMicrokernelTester()
779         .batch_size(batch_size)
780         .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x16, xnn_init_f32_elu_neon_rr2_p6_params);
781     }
782   }
783 
TEST(F32_VELU__NEON_RR2_P6_X16,batch_lt_16)784   TEST(F32_VELU__NEON_RR2_P6_X16, batch_lt_16) {
785     TEST_REQUIRES_ARM_NEON;
786     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
787       VUnaryMicrokernelTester()
788         .batch_size(batch_size)
789         .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x16, xnn_init_f32_elu_neon_rr2_p6_params);
790     }
791   }
792 
TEST(F32_VELU__NEON_RR2_P6_X16,batch_gt_16)793   TEST(F32_VELU__NEON_RR2_P6_X16, batch_gt_16) {
794     TEST_REQUIRES_ARM_NEON;
795     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
796       VUnaryMicrokernelTester()
797         .batch_size(batch_size)
798         .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x16, xnn_init_f32_elu_neon_rr2_p6_params);
799     }
800   }
801 
TEST(F32_VELU__NEON_RR2_P6_X16,inplace)802   TEST(F32_VELU__NEON_RR2_P6_X16, inplace) {
803     TEST_REQUIRES_ARM_NEON;
804     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
805       VUnaryMicrokernelTester()
806         .batch_size(batch_size)
807         .inplace(true)
808         .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x16, xnn_init_f32_elu_neon_rr2_p6_params);
809     }
810   }
811 
TEST(F32_VELU__NEON_RR2_P6_X16,prescale)812   TEST(F32_VELU__NEON_RR2_P6_X16, prescale) {
813     TEST_REQUIRES_ARM_NEON;
814     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
815       for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
816         VUnaryMicrokernelTester()
817           .batch_size(batch_size)
818           .prescale(prescale)
819           .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x16, xnn_init_f32_elu_neon_rr2_p6_params);
820       }
821     }
822   }
823 
TEST(F32_VELU__NEON_RR2_P6_X16,alpha)824   TEST(F32_VELU__NEON_RR2_P6_X16, alpha) {
825     TEST_REQUIRES_ARM_NEON;
826     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
827       for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
828         VUnaryMicrokernelTester()
829           .batch_size(batch_size)
830           .alpha(alpha)
831           .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x16, xnn_init_f32_elu_neon_rr2_p6_params);
832       }
833     }
834   }
835 
TEST(F32_VELU__NEON_RR2_P6_X16,beta)836   TEST(F32_VELU__NEON_RR2_P6_X16, beta) {
837     TEST_REQUIRES_ARM_NEON;
838     for (float beta : std::vector<float>({0.3f, 3.0f})) {
839       for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
840         VUnaryMicrokernelTester()
841           .batch_size(batch_size)
842           .beta(beta)
843           .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x16, xnn_init_f32_elu_neon_rr2_p6_params);
844       }
845     }
846   }
847 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
848 
849 
850 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEON_RR2_P6_X20,batch_eq_20)851   TEST(F32_VELU__NEON_RR2_P6_X20, batch_eq_20) {
852     TEST_REQUIRES_ARM_NEON;
853     VUnaryMicrokernelTester()
854       .batch_size(20)
855       .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x20, xnn_init_f32_elu_neon_rr2_p6_params);
856   }
857 
TEST(F32_VELU__NEON_RR2_P6_X20,batch_div_20)858   TEST(F32_VELU__NEON_RR2_P6_X20, batch_div_20) {
859     TEST_REQUIRES_ARM_NEON;
860     for (size_t batch_size = 40; batch_size < 200; batch_size += 20) {
861       VUnaryMicrokernelTester()
862         .batch_size(batch_size)
863         .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x20, xnn_init_f32_elu_neon_rr2_p6_params);
864     }
865   }
866 
TEST(F32_VELU__NEON_RR2_P6_X20,batch_lt_20)867   TEST(F32_VELU__NEON_RR2_P6_X20, batch_lt_20) {
868     TEST_REQUIRES_ARM_NEON;
869     for (size_t batch_size = 1; batch_size < 20; batch_size++) {
870       VUnaryMicrokernelTester()
871         .batch_size(batch_size)
872         .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x20, xnn_init_f32_elu_neon_rr2_p6_params);
873     }
874   }
875 
TEST(F32_VELU__NEON_RR2_P6_X20,batch_gt_20)876   TEST(F32_VELU__NEON_RR2_P6_X20, batch_gt_20) {
877     TEST_REQUIRES_ARM_NEON;
878     for (size_t batch_size = 21; batch_size < 40; batch_size++) {
879       VUnaryMicrokernelTester()
880         .batch_size(batch_size)
881         .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x20, xnn_init_f32_elu_neon_rr2_p6_params);
882     }
883   }
884 
TEST(F32_VELU__NEON_RR2_P6_X20,inplace)885   TEST(F32_VELU__NEON_RR2_P6_X20, inplace) {
886     TEST_REQUIRES_ARM_NEON;
887     for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
888       VUnaryMicrokernelTester()
889         .batch_size(batch_size)
890         .inplace(true)
891         .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x20, xnn_init_f32_elu_neon_rr2_p6_params);
892     }
893   }
894 
TEST(F32_VELU__NEON_RR2_P6_X20,prescale)895   TEST(F32_VELU__NEON_RR2_P6_X20, prescale) {
896     TEST_REQUIRES_ARM_NEON;
897     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
898       for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
899         VUnaryMicrokernelTester()
900           .batch_size(batch_size)
901           .prescale(prescale)
902           .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x20, xnn_init_f32_elu_neon_rr2_p6_params);
903       }
904     }
905   }
906 
TEST(F32_VELU__NEON_RR2_P6_X20,alpha)907   TEST(F32_VELU__NEON_RR2_P6_X20, alpha) {
908     TEST_REQUIRES_ARM_NEON;
909     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
910       for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
911         VUnaryMicrokernelTester()
912           .batch_size(batch_size)
913           .alpha(alpha)
914           .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x20, xnn_init_f32_elu_neon_rr2_p6_params);
915       }
916     }
917   }
918 
TEST(F32_VELU__NEON_RR2_P6_X20,beta)919   TEST(F32_VELU__NEON_RR2_P6_X20, beta) {
920     TEST_REQUIRES_ARM_NEON;
921     for (float beta : std::vector<float>({0.3f, 3.0f})) {
922       for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
923         VUnaryMicrokernelTester()
924           .batch_size(batch_size)
925           .beta(beta)
926           .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x20, xnn_init_f32_elu_neon_rr2_p6_params);
927       }
928     }
929   }
930 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
931 
932 
933 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEON_RR2_P6_X24,batch_eq_24)934   TEST(F32_VELU__NEON_RR2_P6_X24, batch_eq_24) {
935     TEST_REQUIRES_ARM_NEON;
936     VUnaryMicrokernelTester()
937       .batch_size(24)
938       .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x24, xnn_init_f32_elu_neon_rr2_p6_params);
939   }
940 
TEST(F32_VELU__NEON_RR2_P6_X24,batch_div_24)941   TEST(F32_VELU__NEON_RR2_P6_X24, batch_div_24) {
942     TEST_REQUIRES_ARM_NEON;
943     for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
944       VUnaryMicrokernelTester()
945         .batch_size(batch_size)
946         .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x24, xnn_init_f32_elu_neon_rr2_p6_params);
947     }
948   }
949 
TEST(F32_VELU__NEON_RR2_P6_X24,batch_lt_24)950   TEST(F32_VELU__NEON_RR2_P6_X24, batch_lt_24) {
951     TEST_REQUIRES_ARM_NEON;
952     for (size_t batch_size = 1; batch_size < 24; batch_size++) {
953       VUnaryMicrokernelTester()
954         .batch_size(batch_size)
955         .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x24, xnn_init_f32_elu_neon_rr2_p6_params);
956     }
957   }
958 
TEST(F32_VELU__NEON_RR2_P6_X24,batch_gt_24)959   TEST(F32_VELU__NEON_RR2_P6_X24, batch_gt_24) {
960     TEST_REQUIRES_ARM_NEON;
961     for (size_t batch_size = 25; batch_size < 48; batch_size++) {
962       VUnaryMicrokernelTester()
963         .batch_size(batch_size)
964         .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x24, xnn_init_f32_elu_neon_rr2_p6_params);
965     }
966   }
967 
TEST(F32_VELU__NEON_RR2_P6_X24,inplace)968   TEST(F32_VELU__NEON_RR2_P6_X24, inplace) {
969     TEST_REQUIRES_ARM_NEON;
970     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
971       VUnaryMicrokernelTester()
972         .batch_size(batch_size)
973         .inplace(true)
974         .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x24, xnn_init_f32_elu_neon_rr2_p6_params);
975     }
976   }
977 
TEST(F32_VELU__NEON_RR2_P6_X24,prescale)978   TEST(F32_VELU__NEON_RR2_P6_X24, prescale) {
979     TEST_REQUIRES_ARM_NEON;
980     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
981       for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
982         VUnaryMicrokernelTester()
983           .batch_size(batch_size)
984           .prescale(prescale)
985           .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x24, xnn_init_f32_elu_neon_rr2_p6_params);
986       }
987     }
988   }
989 
TEST(F32_VELU__NEON_RR2_P6_X24,alpha)990   TEST(F32_VELU__NEON_RR2_P6_X24, alpha) {
991     TEST_REQUIRES_ARM_NEON;
992     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
993       for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
994         VUnaryMicrokernelTester()
995           .batch_size(batch_size)
996           .alpha(alpha)
997           .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x24, xnn_init_f32_elu_neon_rr2_p6_params);
998       }
999     }
1000   }
1001 
TEST(F32_VELU__NEON_RR2_P6_X24,beta)1002   TEST(F32_VELU__NEON_RR2_P6_X24, beta) {
1003     TEST_REQUIRES_ARM_NEON;
1004     for (float beta : std::vector<float>({0.3f, 3.0f})) {
1005       for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1006         VUnaryMicrokernelTester()
1007           .batch_size(batch_size)
1008           .beta(beta)
1009           .Test(xnn_f32_velu_ukernel__neon_rr2_p6_x24, xnn_init_f32_elu_neon_rr2_p6_params);
1010       }
1011     }
1012   }
1013 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
1014 
1015 
1016 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X4,batch_eq_4)1017   TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X4, batch_eq_4) {
1018     TEST_REQUIRES_ARM_NEON_FMA;
1019     VUnaryMicrokernelTester()
1020       .batch_size(4)
1021       .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x4, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1022   }
1023 
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X4,batch_div_4)1024   TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X4, batch_div_4) {
1025     TEST_REQUIRES_ARM_NEON_FMA;
1026     for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
1027       VUnaryMicrokernelTester()
1028         .batch_size(batch_size)
1029         .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x4, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1030     }
1031   }
1032 
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X4,batch_lt_4)1033   TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X4, batch_lt_4) {
1034     TEST_REQUIRES_ARM_NEON_FMA;
1035     for (size_t batch_size = 1; batch_size < 4; batch_size++) {
1036       VUnaryMicrokernelTester()
1037         .batch_size(batch_size)
1038         .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x4, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1039     }
1040   }
1041 
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X4,batch_gt_4)1042   TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X4, batch_gt_4) {
1043     TEST_REQUIRES_ARM_NEON_FMA;
1044     for (size_t batch_size = 5; batch_size < 8; batch_size++) {
1045       VUnaryMicrokernelTester()
1046         .batch_size(batch_size)
1047         .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x4, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1048     }
1049   }
1050 
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X4,inplace)1051   TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X4, inplace) {
1052     TEST_REQUIRES_ARM_NEON_FMA;
1053     for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
1054       VUnaryMicrokernelTester()
1055         .batch_size(batch_size)
1056         .inplace(true)
1057         .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x4, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1058     }
1059   }
1060 
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X4,prescale)1061   TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X4, prescale) {
1062     TEST_REQUIRES_ARM_NEON_FMA;
1063     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
1064       for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
1065         VUnaryMicrokernelTester()
1066           .batch_size(batch_size)
1067           .prescale(prescale)
1068           .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x4, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1069       }
1070     }
1071   }
1072 
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X4,alpha)1073   TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X4, alpha) {
1074     TEST_REQUIRES_ARM_NEON_FMA;
1075     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
1076       for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
1077         VUnaryMicrokernelTester()
1078           .batch_size(batch_size)
1079           .alpha(alpha)
1080           .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x4, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1081       }
1082     }
1083   }
1084 
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X4,beta)1085   TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X4, beta) {
1086     TEST_REQUIRES_ARM_NEON_FMA;
1087     for (float beta : std::vector<float>({0.3f, 3.0f})) {
1088       for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
1089         VUnaryMicrokernelTester()
1090           .batch_size(batch_size)
1091           .beta(beta)
1092           .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x4, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1093       }
1094     }
1095   }
1096 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
1097 
1098 
1099 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X8,batch_eq_8)1100   TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X8, batch_eq_8) {
1101     TEST_REQUIRES_ARM_NEON_FMA;
1102     VUnaryMicrokernelTester()
1103       .batch_size(8)
1104       .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x8, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1105   }
1106 
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X8,batch_div_8)1107   TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X8, batch_div_8) {
1108     TEST_REQUIRES_ARM_NEON_FMA;
1109     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
1110       VUnaryMicrokernelTester()
1111         .batch_size(batch_size)
1112         .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x8, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1113     }
1114   }
1115 
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X8,batch_lt_8)1116   TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X8, batch_lt_8) {
1117     TEST_REQUIRES_ARM_NEON_FMA;
1118     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
1119       VUnaryMicrokernelTester()
1120         .batch_size(batch_size)
1121         .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x8, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1122     }
1123   }
1124 
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X8,batch_gt_8)1125   TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X8, batch_gt_8) {
1126     TEST_REQUIRES_ARM_NEON_FMA;
1127     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
1128       VUnaryMicrokernelTester()
1129         .batch_size(batch_size)
1130         .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x8, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1131     }
1132   }
1133 
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X8,inplace)1134   TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X8, inplace) {
1135     TEST_REQUIRES_ARM_NEON_FMA;
1136     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1137       VUnaryMicrokernelTester()
1138         .batch_size(batch_size)
1139         .inplace(true)
1140         .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x8, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1141     }
1142   }
1143 
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X8,prescale)1144   TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X8, prescale) {
1145     TEST_REQUIRES_ARM_NEON_FMA;
1146     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
1147       for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1148         VUnaryMicrokernelTester()
1149           .batch_size(batch_size)
1150           .prescale(prescale)
1151           .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x8, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1152       }
1153     }
1154   }
1155 
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X8,alpha)1156   TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X8, alpha) {
1157     TEST_REQUIRES_ARM_NEON_FMA;
1158     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
1159       for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1160         VUnaryMicrokernelTester()
1161           .batch_size(batch_size)
1162           .alpha(alpha)
1163           .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x8, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1164       }
1165     }
1166   }
1167 
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X8,beta)1168   TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X8, beta) {
1169     TEST_REQUIRES_ARM_NEON_FMA;
1170     for (float beta : std::vector<float>({0.3f, 3.0f})) {
1171       for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1172         VUnaryMicrokernelTester()
1173           .batch_size(batch_size)
1174           .beta(beta)
1175           .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x8, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1176       }
1177     }
1178   }
1179 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
1180 
1181 
1182 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X12,batch_eq_12)1183   TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X12, batch_eq_12) {
1184     TEST_REQUIRES_ARM_NEON_FMA;
1185     VUnaryMicrokernelTester()
1186       .batch_size(12)
1187       .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x12, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1188   }
1189 
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X12,batch_div_12)1190   TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X12, batch_div_12) {
1191     TEST_REQUIRES_ARM_NEON_FMA;
1192     for (size_t batch_size = 24; batch_size < 120; batch_size += 12) {
1193       VUnaryMicrokernelTester()
1194         .batch_size(batch_size)
1195         .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x12, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1196     }
1197   }
1198 
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X12,batch_lt_12)1199   TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X12, batch_lt_12) {
1200     TEST_REQUIRES_ARM_NEON_FMA;
1201     for (size_t batch_size = 1; batch_size < 12; batch_size++) {
1202       VUnaryMicrokernelTester()
1203         .batch_size(batch_size)
1204         .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x12, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1205     }
1206   }
1207 
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X12,batch_gt_12)1208   TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X12, batch_gt_12) {
1209     TEST_REQUIRES_ARM_NEON_FMA;
1210     for (size_t batch_size = 13; batch_size < 24; batch_size++) {
1211       VUnaryMicrokernelTester()
1212         .batch_size(batch_size)
1213         .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x12, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1214     }
1215   }
1216 
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X12,inplace)1217   TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X12, inplace) {
1218     TEST_REQUIRES_ARM_NEON_FMA;
1219     for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
1220       VUnaryMicrokernelTester()
1221         .batch_size(batch_size)
1222         .inplace(true)
1223         .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x12, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1224     }
1225   }
1226 
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X12,prescale)1227   TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X12, prescale) {
1228     TEST_REQUIRES_ARM_NEON_FMA;
1229     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
1230       for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
1231         VUnaryMicrokernelTester()
1232           .batch_size(batch_size)
1233           .prescale(prescale)
1234           .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x12, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1235       }
1236     }
1237   }
1238 
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X12,alpha)1239   TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X12, alpha) {
1240     TEST_REQUIRES_ARM_NEON_FMA;
1241     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
1242       for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
1243         VUnaryMicrokernelTester()
1244           .batch_size(batch_size)
1245           .alpha(alpha)
1246           .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x12, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1247       }
1248     }
1249   }
1250 
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X12,beta)1251   TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X12, beta) {
1252     TEST_REQUIRES_ARM_NEON_FMA;
1253     for (float beta : std::vector<float>({0.3f, 3.0f})) {
1254       for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
1255         VUnaryMicrokernelTester()
1256           .batch_size(batch_size)
1257           .beta(beta)
1258           .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x12, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1259       }
1260     }
1261   }
1262 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
1263 
1264 
1265 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X16,batch_eq_16)1266   TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X16, batch_eq_16) {
1267     TEST_REQUIRES_ARM_NEON_FMA;
1268     VUnaryMicrokernelTester()
1269       .batch_size(16)
1270       .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x16, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1271   }
1272 
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X16,batch_div_16)1273   TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X16, batch_div_16) {
1274     TEST_REQUIRES_ARM_NEON_FMA;
1275     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
1276       VUnaryMicrokernelTester()
1277         .batch_size(batch_size)
1278         .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x16, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1279     }
1280   }
1281 
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X16,batch_lt_16)1282   TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X16, batch_lt_16) {
1283     TEST_REQUIRES_ARM_NEON_FMA;
1284     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
1285       VUnaryMicrokernelTester()
1286         .batch_size(batch_size)
1287         .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x16, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1288     }
1289   }
1290 
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X16,batch_gt_16)1291   TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X16, batch_gt_16) {
1292     TEST_REQUIRES_ARM_NEON_FMA;
1293     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
1294       VUnaryMicrokernelTester()
1295         .batch_size(batch_size)
1296         .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x16, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1297     }
1298   }
1299 
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X16,inplace)1300   TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X16, inplace) {
1301     TEST_REQUIRES_ARM_NEON_FMA;
1302     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1303       VUnaryMicrokernelTester()
1304         .batch_size(batch_size)
1305         .inplace(true)
1306         .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x16, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1307     }
1308   }
1309 
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X16,prescale)1310   TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X16, prescale) {
1311     TEST_REQUIRES_ARM_NEON_FMA;
1312     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
1313       for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1314         VUnaryMicrokernelTester()
1315           .batch_size(batch_size)
1316           .prescale(prescale)
1317           .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x16, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1318       }
1319     }
1320   }
1321 
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X16,alpha)1322   TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X16, alpha) {
1323     TEST_REQUIRES_ARM_NEON_FMA;
1324     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
1325       for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1326         VUnaryMicrokernelTester()
1327           .batch_size(batch_size)
1328           .alpha(alpha)
1329           .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x16, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1330       }
1331     }
1332   }
1333 
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X16,beta)1334   TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X16, beta) {
1335     TEST_REQUIRES_ARM_NEON_FMA;
1336     for (float beta : std::vector<float>({0.3f, 3.0f})) {
1337       for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1338         VUnaryMicrokernelTester()
1339           .batch_size(batch_size)
1340           .beta(beta)
1341           .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x16, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1342       }
1343     }
1344   }
1345 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
1346 
1347 
1348 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X20,batch_eq_20)1349   TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X20, batch_eq_20) {
1350     TEST_REQUIRES_ARM_NEON_FMA;
1351     VUnaryMicrokernelTester()
1352       .batch_size(20)
1353       .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x20, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1354   }
1355 
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X20,batch_div_20)1356   TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X20, batch_div_20) {
1357     TEST_REQUIRES_ARM_NEON_FMA;
1358     for (size_t batch_size = 40; batch_size < 200; batch_size += 20) {
1359       VUnaryMicrokernelTester()
1360         .batch_size(batch_size)
1361         .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x20, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1362     }
1363   }
1364 
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X20,batch_lt_20)1365   TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X20, batch_lt_20) {
1366     TEST_REQUIRES_ARM_NEON_FMA;
1367     for (size_t batch_size = 1; batch_size < 20; batch_size++) {
1368       VUnaryMicrokernelTester()
1369         .batch_size(batch_size)
1370         .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x20, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1371     }
1372   }
1373 
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X20,batch_gt_20)1374   TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X20, batch_gt_20) {
1375     TEST_REQUIRES_ARM_NEON_FMA;
1376     for (size_t batch_size = 21; batch_size < 40; batch_size++) {
1377       VUnaryMicrokernelTester()
1378         .batch_size(batch_size)
1379         .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x20, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1380     }
1381   }
1382 
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X20,inplace)1383   TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X20, inplace) {
1384     TEST_REQUIRES_ARM_NEON_FMA;
1385     for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
1386       VUnaryMicrokernelTester()
1387         .batch_size(batch_size)
1388         .inplace(true)
1389         .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x20, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1390     }
1391   }
1392 
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X20,prescale)1393   TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X20, prescale) {
1394     TEST_REQUIRES_ARM_NEON_FMA;
1395     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
1396       for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
1397         VUnaryMicrokernelTester()
1398           .batch_size(batch_size)
1399           .prescale(prescale)
1400           .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x20, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1401       }
1402     }
1403   }
1404 
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X20,alpha)1405   TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X20, alpha) {
1406     TEST_REQUIRES_ARM_NEON_FMA;
1407     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
1408       for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
1409         VUnaryMicrokernelTester()
1410           .batch_size(batch_size)
1411           .alpha(alpha)
1412           .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x20, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1413       }
1414     }
1415   }
1416 
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X20,beta)1417   TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X20, beta) {
1418     TEST_REQUIRES_ARM_NEON_FMA;
1419     for (float beta : std::vector<float>({0.3f, 3.0f})) {
1420       for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
1421         VUnaryMicrokernelTester()
1422           .batch_size(batch_size)
1423           .beta(beta)
1424           .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x20, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1425       }
1426     }
1427   }
1428 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
1429 
1430 
1431 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X24,batch_eq_24)1432   TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X24, batch_eq_24) {
1433     TEST_REQUIRES_ARM_NEON_FMA;
1434     VUnaryMicrokernelTester()
1435       .batch_size(24)
1436       .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1437   }
1438 
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X24,batch_div_24)1439   TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X24, batch_div_24) {
1440     TEST_REQUIRES_ARM_NEON_FMA;
1441     for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
1442       VUnaryMicrokernelTester()
1443         .batch_size(batch_size)
1444         .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1445     }
1446   }
1447 
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X24,batch_lt_24)1448   TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X24, batch_lt_24) {
1449     TEST_REQUIRES_ARM_NEON_FMA;
1450     for (size_t batch_size = 1; batch_size < 24; batch_size++) {
1451       VUnaryMicrokernelTester()
1452         .batch_size(batch_size)
1453         .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1454     }
1455   }
1456 
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X24,batch_gt_24)1457   TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X24, batch_gt_24) {
1458     TEST_REQUIRES_ARM_NEON_FMA;
1459     for (size_t batch_size = 25; batch_size < 48; batch_size++) {
1460       VUnaryMicrokernelTester()
1461         .batch_size(batch_size)
1462         .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1463     }
1464   }
1465 
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X24,inplace)1466   TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X24, inplace) {
1467     TEST_REQUIRES_ARM_NEON_FMA;
1468     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1469       VUnaryMicrokernelTester()
1470         .batch_size(batch_size)
1471         .inplace(true)
1472         .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1473     }
1474   }
1475 
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X24,prescale)1476   TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X24, prescale) {
1477     TEST_REQUIRES_ARM_NEON_FMA;
1478     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
1479       for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1480         VUnaryMicrokernelTester()
1481           .batch_size(batch_size)
1482           .prescale(prescale)
1483           .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1484       }
1485     }
1486   }
1487 
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X24,alpha)1488   TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X24, alpha) {
1489     TEST_REQUIRES_ARM_NEON_FMA;
1490     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
1491       for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1492         VUnaryMicrokernelTester()
1493           .batch_size(batch_size)
1494           .alpha(alpha)
1495           .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1496       }
1497     }
1498   }
1499 
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X24,beta)1500   TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X24, beta) {
1501     TEST_REQUIRES_ARM_NEON_FMA;
1502     for (float beta : std::vector<float>({0.3f, 3.0f})) {
1503       for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1504         VUnaryMicrokernelTester()
1505           .batch_size(batch_size)
1506           .beta(beta)
1507           .Test(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24, xnn_init_f32_elu_neonfma_rr1_lut16_p3_params);
1508       }
1509     }
1510   }
1511 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
1512 
1513 
1514 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEONFMA_RR1_P6_X4,batch_eq_4)1515   TEST(F32_VELU__NEONFMA_RR1_P6_X4, batch_eq_4) {
1516     TEST_REQUIRES_ARM_NEON_FMA;
1517     VUnaryMicrokernelTester()
1518       .batch_size(4)
1519       .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x4, xnn_init_f32_elu_neonfma_rr1_p6_params);
1520   }
1521 
TEST(F32_VELU__NEONFMA_RR1_P6_X4,batch_div_4)1522   TEST(F32_VELU__NEONFMA_RR1_P6_X4, batch_div_4) {
1523     TEST_REQUIRES_ARM_NEON_FMA;
1524     for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
1525       VUnaryMicrokernelTester()
1526         .batch_size(batch_size)
1527         .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x4, xnn_init_f32_elu_neonfma_rr1_p6_params);
1528     }
1529   }
1530 
TEST(F32_VELU__NEONFMA_RR1_P6_X4,batch_lt_4)1531   TEST(F32_VELU__NEONFMA_RR1_P6_X4, batch_lt_4) {
1532     TEST_REQUIRES_ARM_NEON_FMA;
1533     for (size_t batch_size = 1; batch_size < 4; batch_size++) {
1534       VUnaryMicrokernelTester()
1535         .batch_size(batch_size)
1536         .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x4, xnn_init_f32_elu_neonfma_rr1_p6_params);
1537     }
1538   }
1539 
TEST(F32_VELU__NEONFMA_RR1_P6_X4,batch_gt_4)1540   TEST(F32_VELU__NEONFMA_RR1_P6_X4, batch_gt_4) {
1541     TEST_REQUIRES_ARM_NEON_FMA;
1542     for (size_t batch_size = 5; batch_size < 8; batch_size++) {
1543       VUnaryMicrokernelTester()
1544         .batch_size(batch_size)
1545         .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x4, xnn_init_f32_elu_neonfma_rr1_p6_params);
1546     }
1547   }
1548 
TEST(F32_VELU__NEONFMA_RR1_P6_X4,inplace)1549   TEST(F32_VELU__NEONFMA_RR1_P6_X4, inplace) {
1550     TEST_REQUIRES_ARM_NEON_FMA;
1551     for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
1552       VUnaryMicrokernelTester()
1553         .batch_size(batch_size)
1554         .inplace(true)
1555         .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x4, xnn_init_f32_elu_neonfma_rr1_p6_params);
1556     }
1557   }
1558 
TEST(F32_VELU__NEONFMA_RR1_P6_X4,prescale)1559   TEST(F32_VELU__NEONFMA_RR1_P6_X4, prescale) {
1560     TEST_REQUIRES_ARM_NEON_FMA;
1561     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
1562       for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
1563         VUnaryMicrokernelTester()
1564           .batch_size(batch_size)
1565           .prescale(prescale)
1566           .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x4, xnn_init_f32_elu_neonfma_rr1_p6_params);
1567       }
1568     }
1569   }
1570 
TEST(F32_VELU__NEONFMA_RR1_P6_X4,alpha)1571   TEST(F32_VELU__NEONFMA_RR1_P6_X4, alpha) {
1572     TEST_REQUIRES_ARM_NEON_FMA;
1573     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
1574       for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
1575         VUnaryMicrokernelTester()
1576           .batch_size(batch_size)
1577           .alpha(alpha)
1578           .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x4, xnn_init_f32_elu_neonfma_rr1_p6_params);
1579       }
1580     }
1581   }
1582 
TEST(F32_VELU__NEONFMA_RR1_P6_X4,beta)1583   TEST(F32_VELU__NEONFMA_RR1_P6_X4, beta) {
1584     TEST_REQUIRES_ARM_NEON_FMA;
1585     for (float beta : std::vector<float>({0.3f, 3.0f})) {
1586       for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
1587         VUnaryMicrokernelTester()
1588           .batch_size(batch_size)
1589           .beta(beta)
1590           .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x4, xnn_init_f32_elu_neonfma_rr1_p6_params);
1591       }
1592     }
1593   }
1594 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
1595 
1596 
1597 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEONFMA_RR1_P6_X8,batch_eq_8)1598   TEST(F32_VELU__NEONFMA_RR1_P6_X8, batch_eq_8) {
1599     TEST_REQUIRES_ARM_NEON_FMA;
1600     VUnaryMicrokernelTester()
1601       .batch_size(8)
1602       .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x8, xnn_init_f32_elu_neonfma_rr1_p6_params);
1603   }
1604 
TEST(F32_VELU__NEONFMA_RR1_P6_X8,batch_div_8)1605   TEST(F32_VELU__NEONFMA_RR1_P6_X8, batch_div_8) {
1606     TEST_REQUIRES_ARM_NEON_FMA;
1607     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
1608       VUnaryMicrokernelTester()
1609         .batch_size(batch_size)
1610         .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x8, xnn_init_f32_elu_neonfma_rr1_p6_params);
1611     }
1612   }
1613 
TEST(F32_VELU__NEONFMA_RR1_P6_X8,batch_lt_8)1614   TEST(F32_VELU__NEONFMA_RR1_P6_X8, batch_lt_8) {
1615     TEST_REQUIRES_ARM_NEON_FMA;
1616     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
1617       VUnaryMicrokernelTester()
1618         .batch_size(batch_size)
1619         .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x8, xnn_init_f32_elu_neonfma_rr1_p6_params);
1620     }
1621   }
1622 
TEST(F32_VELU__NEONFMA_RR1_P6_X8,batch_gt_8)1623   TEST(F32_VELU__NEONFMA_RR1_P6_X8, batch_gt_8) {
1624     TEST_REQUIRES_ARM_NEON_FMA;
1625     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
1626       VUnaryMicrokernelTester()
1627         .batch_size(batch_size)
1628         .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x8, xnn_init_f32_elu_neonfma_rr1_p6_params);
1629     }
1630   }
1631 
TEST(F32_VELU__NEONFMA_RR1_P6_X8,inplace)1632   TEST(F32_VELU__NEONFMA_RR1_P6_X8, inplace) {
1633     TEST_REQUIRES_ARM_NEON_FMA;
1634     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1635       VUnaryMicrokernelTester()
1636         .batch_size(batch_size)
1637         .inplace(true)
1638         .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x8, xnn_init_f32_elu_neonfma_rr1_p6_params);
1639     }
1640   }
1641 
TEST(F32_VELU__NEONFMA_RR1_P6_X8,prescale)1642   TEST(F32_VELU__NEONFMA_RR1_P6_X8, prescale) {
1643     TEST_REQUIRES_ARM_NEON_FMA;
1644     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
1645       for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1646         VUnaryMicrokernelTester()
1647           .batch_size(batch_size)
1648           .prescale(prescale)
1649           .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x8, xnn_init_f32_elu_neonfma_rr1_p6_params);
1650       }
1651     }
1652   }
1653 
TEST(F32_VELU__NEONFMA_RR1_P6_X8,alpha)1654   TEST(F32_VELU__NEONFMA_RR1_P6_X8, alpha) {
1655     TEST_REQUIRES_ARM_NEON_FMA;
1656     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
1657       for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1658         VUnaryMicrokernelTester()
1659           .batch_size(batch_size)
1660           .alpha(alpha)
1661           .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x8, xnn_init_f32_elu_neonfma_rr1_p6_params);
1662       }
1663     }
1664   }
1665 
TEST(F32_VELU__NEONFMA_RR1_P6_X8,beta)1666   TEST(F32_VELU__NEONFMA_RR1_P6_X8, beta) {
1667     TEST_REQUIRES_ARM_NEON_FMA;
1668     for (float beta : std::vector<float>({0.3f, 3.0f})) {
1669       for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1670         VUnaryMicrokernelTester()
1671           .batch_size(batch_size)
1672           .beta(beta)
1673           .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x8, xnn_init_f32_elu_neonfma_rr1_p6_params);
1674       }
1675     }
1676   }
1677 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
1678 
1679 
1680 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEONFMA_RR1_P6_X12,batch_eq_12)1681   TEST(F32_VELU__NEONFMA_RR1_P6_X12, batch_eq_12) {
1682     TEST_REQUIRES_ARM_NEON_FMA;
1683     VUnaryMicrokernelTester()
1684       .batch_size(12)
1685       .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x12, xnn_init_f32_elu_neonfma_rr1_p6_params);
1686   }
1687 
TEST(F32_VELU__NEONFMA_RR1_P6_X12,batch_div_12)1688   TEST(F32_VELU__NEONFMA_RR1_P6_X12, batch_div_12) {
1689     TEST_REQUIRES_ARM_NEON_FMA;
1690     for (size_t batch_size = 24; batch_size < 120; batch_size += 12) {
1691       VUnaryMicrokernelTester()
1692         .batch_size(batch_size)
1693         .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x12, xnn_init_f32_elu_neonfma_rr1_p6_params);
1694     }
1695   }
1696 
TEST(F32_VELU__NEONFMA_RR1_P6_X12,batch_lt_12)1697   TEST(F32_VELU__NEONFMA_RR1_P6_X12, batch_lt_12) {
1698     TEST_REQUIRES_ARM_NEON_FMA;
1699     for (size_t batch_size = 1; batch_size < 12; batch_size++) {
1700       VUnaryMicrokernelTester()
1701         .batch_size(batch_size)
1702         .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x12, xnn_init_f32_elu_neonfma_rr1_p6_params);
1703     }
1704   }
1705 
TEST(F32_VELU__NEONFMA_RR1_P6_X12,batch_gt_12)1706   TEST(F32_VELU__NEONFMA_RR1_P6_X12, batch_gt_12) {
1707     TEST_REQUIRES_ARM_NEON_FMA;
1708     for (size_t batch_size = 13; batch_size < 24; batch_size++) {
1709       VUnaryMicrokernelTester()
1710         .batch_size(batch_size)
1711         .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x12, xnn_init_f32_elu_neonfma_rr1_p6_params);
1712     }
1713   }
1714 
TEST(F32_VELU__NEONFMA_RR1_P6_X12,inplace)1715   TEST(F32_VELU__NEONFMA_RR1_P6_X12, inplace) {
1716     TEST_REQUIRES_ARM_NEON_FMA;
1717     for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
1718       VUnaryMicrokernelTester()
1719         .batch_size(batch_size)
1720         .inplace(true)
1721         .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x12, xnn_init_f32_elu_neonfma_rr1_p6_params);
1722     }
1723   }
1724 
TEST(F32_VELU__NEONFMA_RR1_P6_X12,prescale)1725   TEST(F32_VELU__NEONFMA_RR1_P6_X12, prescale) {
1726     TEST_REQUIRES_ARM_NEON_FMA;
1727     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
1728       for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
1729         VUnaryMicrokernelTester()
1730           .batch_size(batch_size)
1731           .prescale(prescale)
1732           .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x12, xnn_init_f32_elu_neonfma_rr1_p6_params);
1733       }
1734     }
1735   }
1736 
TEST(F32_VELU__NEONFMA_RR1_P6_X12,alpha)1737   TEST(F32_VELU__NEONFMA_RR1_P6_X12, alpha) {
1738     TEST_REQUIRES_ARM_NEON_FMA;
1739     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
1740       for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
1741         VUnaryMicrokernelTester()
1742           .batch_size(batch_size)
1743           .alpha(alpha)
1744           .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x12, xnn_init_f32_elu_neonfma_rr1_p6_params);
1745       }
1746     }
1747   }
1748 
TEST(F32_VELU__NEONFMA_RR1_P6_X12,beta)1749   TEST(F32_VELU__NEONFMA_RR1_P6_X12, beta) {
1750     TEST_REQUIRES_ARM_NEON_FMA;
1751     for (float beta : std::vector<float>({0.3f, 3.0f})) {
1752       for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
1753         VUnaryMicrokernelTester()
1754           .batch_size(batch_size)
1755           .beta(beta)
1756           .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x12, xnn_init_f32_elu_neonfma_rr1_p6_params);
1757       }
1758     }
1759   }
1760 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
1761 
1762 
1763 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEONFMA_RR1_P6_X16,batch_eq_16)1764   TEST(F32_VELU__NEONFMA_RR1_P6_X16, batch_eq_16) {
1765     TEST_REQUIRES_ARM_NEON_FMA;
1766     VUnaryMicrokernelTester()
1767       .batch_size(16)
1768       .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x16, xnn_init_f32_elu_neonfma_rr1_p6_params);
1769   }
1770 
TEST(F32_VELU__NEONFMA_RR1_P6_X16,batch_div_16)1771   TEST(F32_VELU__NEONFMA_RR1_P6_X16, batch_div_16) {
1772     TEST_REQUIRES_ARM_NEON_FMA;
1773     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
1774       VUnaryMicrokernelTester()
1775         .batch_size(batch_size)
1776         .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x16, xnn_init_f32_elu_neonfma_rr1_p6_params);
1777     }
1778   }
1779 
TEST(F32_VELU__NEONFMA_RR1_P6_X16,batch_lt_16)1780   TEST(F32_VELU__NEONFMA_RR1_P6_X16, batch_lt_16) {
1781     TEST_REQUIRES_ARM_NEON_FMA;
1782     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
1783       VUnaryMicrokernelTester()
1784         .batch_size(batch_size)
1785         .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x16, xnn_init_f32_elu_neonfma_rr1_p6_params);
1786     }
1787   }
1788 
TEST(F32_VELU__NEONFMA_RR1_P6_X16,batch_gt_16)1789   TEST(F32_VELU__NEONFMA_RR1_P6_X16, batch_gt_16) {
1790     TEST_REQUIRES_ARM_NEON_FMA;
1791     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
1792       VUnaryMicrokernelTester()
1793         .batch_size(batch_size)
1794         .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x16, xnn_init_f32_elu_neonfma_rr1_p6_params);
1795     }
1796   }
1797 
TEST(F32_VELU__NEONFMA_RR1_P6_X16,inplace)1798   TEST(F32_VELU__NEONFMA_RR1_P6_X16, inplace) {
1799     TEST_REQUIRES_ARM_NEON_FMA;
1800     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1801       VUnaryMicrokernelTester()
1802         .batch_size(batch_size)
1803         .inplace(true)
1804         .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x16, xnn_init_f32_elu_neonfma_rr1_p6_params);
1805     }
1806   }
1807 
TEST(F32_VELU__NEONFMA_RR1_P6_X16,prescale)1808   TEST(F32_VELU__NEONFMA_RR1_P6_X16, prescale) {
1809     TEST_REQUIRES_ARM_NEON_FMA;
1810     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
1811       for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1812         VUnaryMicrokernelTester()
1813           .batch_size(batch_size)
1814           .prescale(prescale)
1815           .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x16, xnn_init_f32_elu_neonfma_rr1_p6_params);
1816       }
1817     }
1818   }
1819 
TEST(F32_VELU__NEONFMA_RR1_P6_X16,alpha)1820   TEST(F32_VELU__NEONFMA_RR1_P6_X16, alpha) {
1821     TEST_REQUIRES_ARM_NEON_FMA;
1822     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
1823       for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1824         VUnaryMicrokernelTester()
1825           .batch_size(batch_size)
1826           .alpha(alpha)
1827           .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x16, xnn_init_f32_elu_neonfma_rr1_p6_params);
1828       }
1829     }
1830   }
1831 
TEST(F32_VELU__NEONFMA_RR1_P6_X16,beta)1832   TEST(F32_VELU__NEONFMA_RR1_P6_X16, beta) {
1833     TEST_REQUIRES_ARM_NEON_FMA;
1834     for (float beta : std::vector<float>({0.3f, 3.0f})) {
1835       for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1836         VUnaryMicrokernelTester()
1837           .batch_size(batch_size)
1838           .beta(beta)
1839           .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x16, xnn_init_f32_elu_neonfma_rr1_p6_params);
1840       }
1841     }
1842   }
1843 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
1844 
1845 
1846 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEONFMA_RR1_P6_X20,batch_eq_20)1847   TEST(F32_VELU__NEONFMA_RR1_P6_X20, batch_eq_20) {
1848     TEST_REQUIRES_ARM_NEON_FMA;
1849     VUnaryMicrokernelTester()
1850       .batch_size(20)
1851       .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x20, xnn_init_f32_elu_neonfma_rr1_p6_params);
1852   }
1853 
TEST(F32_VELU__NEONFMA_RR1_P6_X20,batch_div_20)1854   TEST(F32_VELU__NEONFMA_RR1_P6_X20, batch_div_20) {
1855     TEST_REQUIRES_ARM_NEON_FMA;
1856     for (size_t batch_size = 40; batch_size < 200; batch_size += 20) {
1857       VUnaryMicrokernelTester()
1858         .batch_size(batch_size)
1859         .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x20, xnn_init_f32_elu_neonfma_rr1_p6_params);
1860     }
1861   }
1862 
TEST(F32_VELU__NEONFMA_RR1_P6_X20,batch_lt_20)1863   TEST(F32_VELU__NEONFMA_RR1_P6_X20, batch_lt_20) {
1864     TEST_REQUIRES_ARM_NEON_FMA;
1865     for (size_t batch_size = 1; batch_size < 20; batch_size++) {
1866       VUnaryMicrokernelTester()
1867         .batch_size(batch_size)
1868         .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x20, xnn_init_f32_elu_neonfma_rr1_p6_params);
1869     }
1870   }
1871 
TEST(F32_VELU__NEONFMA_RR1_P6_X20,batch_gt_20)1872   TEST(F32_VELU__NEONFMA_RR1_P6_X20, batch_gt_20) {
1873     TEST_REQUIRES_ARM_NEON_FMA;
1874     for (size_t batch_size = 21; batch_size < 40; batch_size++) {
1875       VUnaryMicrokernelTester()
1876         .batch_size(batch_size)
1877         .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x20, xnn_init_f32_elu_neonfma_rr1_p6_params);
1878     }
1879   }
1880 
TEST(F32_VELU__NEONFMA_RR1_P6_X20,inplace)1881   TEST(F32_VELU__NEONFMA_RR1_P6_X20, inplace) {
1882     TEST_REQUIRES_ARM_NEON_FMA;
1883     for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
1884       VUnaryMicrokernelTester()
1885         .batch_size(batch_size)
1886         .inplace(true)
1887         .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x20, xnn_init_f32_elu_neonfma_rr1_p6_params);
1888     }
1889   }
1890 
TEST(F32_VELU__NEONFMA_RR1_P6_X20,prescale)1891   TEST(F32_VELU__NEONFMA_RR1_P6_X20, prescale) {
1892     TEST_REQUIRES_ARM_NEON_FMA;
1893     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
1894       for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
1895         VUnaryMicrokernelTester()
1896           .batch_size(batch_size)
1897           .prescale(prescale)
1898           .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x20, xnn_init_f32_elu_neonfma_rr1_p6_params);
1899       }
1900     }
1901   }
1902 
TEST(F32_VELU__NEONFMA_RR1_P6_X20,alpha)1903   TEST(F32_VELU__NEONFMA_RR1_P6_X20, alpha) {
1904     TEST_REQUIRES_ARM_NEON_FMA;
1905     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
1906       for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
1907         VUnaryMicrokernelTester()
1908           .batch_size(batch_size)
1909           .alpha(alpha)
1910           .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x20, xnn_init_f32_elu_neonfma_rr1_p6_params);
1911       }
1912     }
1913   }
1914 
TEST(F32_VELU__NEONFMA_RR1_P6_X20,beta)1915   TEST(F32_VELU__NEONFMA_RR1_P6_X20, beta) {
1916     TEST_REQUIRES_ARM_NEON_FMA;
1917     for (float beta : std::vector<float>({0.3f, 3.0f})) {
1918       for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
1919         VUnaryMicrokernelTester()
1920           .batch_size(batch_size)
1921           .beta(beta)
1922           .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x20, xnn_init_f32_elu_neonfma_rr1_p6_params);
1923       }
1924     }
1925   }
1926 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
1927 
1928 
1929 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEONFMA_RR1_P6_X24,batch_eq_24)1930   TEST(F32_VELU__NEONFMA_RR1_P6_X24, batch_eq_24) {
1931     TEST_REQUIRES_ARM_NEON_FMA;
1932     VUnaryMicrokernelTester()
1933       .batch_size(24)
1934       .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x24, xnn_init_f32_elu_neonfma_rr1_p6_params);
1935   }
1936 
TEST(F32_VELU__NEONFMA_RR1_P6_X24,batch_div_24)1937   TEST(F32_VELU__NEONFMA_RR1_P6_X24, batch_div_24) {
1938     TEST_REQUIRES_ARM_NEON_FMA;
1939     for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
1940       VUnaryMicrokernelTester()
1941         .batch_size(batch_size)
1942         .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x24, xnn_init_f32_elu_neonfma_rr1_p6_params);
1943     }
1944   }
1945 
TEST(F32_VELU__NEONFMA_RR1_P6_X24,batch_lt_24)1946   TEST(F32_VELU__NEONFMA_RR1_P6_X24, batch_lt_24) {
1947     TEST_REQUIRES_ARM_NEON_FMA;
1948     for (size_t batch_size = 1; batch_size < 24; batch_size++) {
1949       VUnaryMicrokernelTester()
1950         .batch_size(batch_size)
1951         .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x24, xnn_init_f32_elu_neonfma_rr1_p6_params);
1952     }
1953   }
1954 
TEST(F32_VELU__NEONFMA_RR1_P6_X24,batch_gt_24)1955   TEST(F32_VELU__NEONFMA_RR1_P6_X24, batch_gt_24) {
1956     TEST_REQUIRES_ARM_NEON_FMA;
1957     for (size_t batch_size = 25; batch_size < 48; batch_size++) {
1958       VUnaryMicrokernelTester()
1959         .batch_size(batch_size)
1960         .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x24, xnn_init_f32_elu_neonfma_rr1_p6_params);
1961     }
1962   }
1963 
TEST(F32_VELU__NEONFMA_RR1_P6_X24,inplace)1964   TEST(F32_VELU__NEONFMA_RR1_P6_X24, inplace) {
1965     TEST_REQUIRES_ARM_NEON_FMA;
1966     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1967       VUnaryMicrokernelTester()
1968         .batch_size(batch_size)
1969         .inplace(true)
1970         .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x24, xnn_init_f32_elu_neonfma_rr1_p6_params);
1971     }
1972   }
1973 
TEST(F32_VELU__NEONFMA_RR1_P6_X24,prescale)1974   TEST(F32_VELU__NEONFMA_RR1_P6_X24, prescale) {
1975     TEST_REQUIRES_ARM_NEON_FMA;
1976     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
1977       for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1978         VUnaryMicrokernelTester()
1979           .batch_size(batch_size)
1980           .prescale(prescale)
1981           .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x24, xnn_init_f32_elu_neonfma_rr1_p6_params);
1982       }
1983     }
1984   }
1985 
TEST(F32_VELU__NEONFMA_RR1_P6_X24,alpha)1986   TEST(F32_VELU__NEONFMA_RR1_P6_X24, alpha) {
1987     TEST_REQUIRES_ARM_NEON_FMA;
1988     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
1989       for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1990         VUnaryMicrokernelTester()
1991           .batch_size(batch_size)
1992           .alpha(alpha)
1993           .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x24, xnn_init_f32_elu_neonfma_rr1_p6_params);
1994       }
1995     }
1996   }
1997 
TEST(F32_VELU__NEONFMA_RR1_P6_X24,beta)1998   TEST(F32_VELU__NEONFMA_RR1_P6_X24, beta) {
1999     TEST_REQUIRES_ARM_NEON_FMA;
2000     for (float beta : std::vector<float>({0.3f, 3.0f})) {
2001       for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2002         VUnaryMicrokernelTester()
2003           .batch_size(batch_size)
2004           .beta(beta)
2005           .Test(xnn_f32_velu_ukernel__neonfma_rr1_p6_x24, xnn_init_f32_elu_neonfma_rr1_p6_params);
2006       }
2007     }
2008   }
2009 #endif  // XNN_ARCH_ARM || XNN_ARCH_ARM64
2010 
2011 
2012 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X4,batch_eq_4)2013   TEST(F32_VELU__SSE2_RR2_LUT16_P3_X4, batch_eq_4) {
2014     TEST_REQUIRES_X86_SSE2;
2015     VUnaryMicrokernelTester()
2016       .batch_size(4)
2017       .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2018   }
2019 
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X4,batch_div_4)2020   TEST(F32_VELU__SSE2_RR2_LUT16_P3_X4, batch_div_4) {
2021     TEST_REQUIRES_X86_SSE2;
2022     for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
2023       VUnaryMicrokernelTester()
2024         .batch_size(batch_size)
2025         .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2026     }
2027   }
2028 
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X4,batch_lt_4)2029   TEST(F32_VELU__SSE2_RR2_LUT16_P3_X4, batch_lt_4) {
2030     TEST_REQUIRES_X86_SSE2;
2031     for (size_t batch_size = 1; batch_size < 4; batch_size++) {
2032       VUnaryMicrokernelTester()
2033         .batch_size(batch_size)
2034         .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2035     }
2036   }
2037 
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X4,batch_gt_4)2038   TEST(F32_VELU__SSE2_RR2_LUT16_P3_X4, batch_gt_4) {
2039     TEST_REQUIRES_X86_SSE2;
2040     for (size_t batch_size = 5; batch_size < 8; batch_size++) {
2041       VUnaryMicrokernelTester()
2042         .batch_size(batch_size)
2043         .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2044     }
2045   }
2046 
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X4,inplace)2047   TEST(F32_VELU__SSE2_RR2_LUT16_P3_X4, inplace) {
2048     TEST_REQUIRES_X86_SSE2;
2049     for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2050       VUnaryMicrokernelTester()
2051         .batch_size(batch_size)
2052         .inplace(true)
2053         .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2054     }
2055   }
2056 
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X4,prescale)2057   TEST(F32_VELU__SSE2_RR2_LUT16_P3_X4, prescale) {
2058     TEST_REQUIRES_X86_SSE2;
2059     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
2060       for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2061         VUnaryMicrokernelTester()
2062           .batch_size(batch_size)
2063           .prescale(prescale)
2064           .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2065       }
2066     }
2067   }
2068 
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X4,alpha)2069   TEST(F32_VELU__SSE2_RR2_LUT16_P3_X4, alpha) {
2070     TEST_REQUIRES_X86_SSE2;
2071     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
2072       for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2073         VUnaryMicrokernelTester()
2074           .batch_size(batch_size)
2075           .alpha(alpha)
2076           .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2077       }
2078     }
2079   }
2080 
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X4,beta)2081   TEST(F32_VELU__SSE2_RR2_LUT16_P3_X4, beta) {
2082     TEST_REQUIRES_X86_SSE2;
2083     for (float beta : std::vector<float>({0.3f, 3.0f})) {
2084       for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2085         VUnaryMicrokernelTester()
2086           .batch_size(batch_size)
2087           .beta(beta)
2088           .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2089       }
2090     }
2091   }
2092 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
2093 
2094 
2095 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X8,batch_eq_8)2096   TEST(F32_VELU__SSE2_RR2_LUT16_P3_X8, batch_eq_8) {
2097     TEST_REQUIRES_X86_SSE2;
2098     VUnaryMicrokernelTester()
2099       .batch_size(8)
2100       .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2101   }
2102 
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X8,batch_div_8)2103   TEST(F32_VELU__SSE2_RR2_LUT16_P3_X8, batch_div_8) {
2104     TEST_REQUIRES_X86_SSE2;
2105     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
2106       VUnaryMicrokernelTester()
2107         .batch_size(batch_size)
2108         .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2109     }
2110   }
2111 
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X8,batch_lt_8)2112   TEST(F32_VELU__SSE2_RR2_LUT16_P3_X8, batch_lt_8) {
2113     TEST_REQUIRES_X86_SSE2;
2114     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
2115       VUnaryMicrokernelTester()
2116         .batch_size(batch_size)
2117         .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2118     }
2119   }
2120 
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X8,batch_gt_8)2121   TEST(F32_VELU__SSE2_RR2_LUT16_P3_X8, batch_gt_8) {
2122     TEST_REQUIRES_X86_SSE2;
2123     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
2124       VUnaryMicrokernelTester()
2125         .batch_size(batch_size)
2126         .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2127     }
2128   }
2129 
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X8,inplace)2130   TEST(F32_VELU__SSE2_RR2_LUT16_P3_X8, inplace) {
2131     TEST_REQUIRES_X86_SSE2;
2132     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2133       VUnaryMicrokernelTester()
2134         .batch_size(batch_size)
2135         .inplace(true)
2136         .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2137     }
2138   }
2139 
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X8,prescale)2140   TEST(F32_VELU__SSE2_RR2_LUT16_P3_X8, prescale) {
2141     TEST_REQUIRES_X86_SSE2;
2142     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
2143       for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2144         VUnaryMicrokernelTester()
2145           .batch_size(batch_size)
2146           .prescale(prescale)
2147           .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2148       }
2149     }
2150   }
2151 
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X8,alpha)2152   TEST(F32_VELU__SSE2_RR2_LUT16_P3_X8, alpha) {
2153     TEST_REQUIRES_X86_SSE2;
2154     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
2155       for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2156         VUnaryMicrokernelTester()
2157           .batch_size(batch_size)
2158           .alpha(alpha)
2159           .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2160       }
2161     }
2162   }
2163 
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X8,beta)2164   TEST(F32_VELU__SSE2_RR2_LUT16_P3_X8, beta) {
2165     TEST_REQUIRES_X86_SSE2;
2166     for (float beta : std::vector<float>({0.3f, 3.0f})) {
2167       for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2168         VUnaryMicrokernelTester()
2169           .batch_size(batch_size)
2170           .beta(beta)
2171           .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2172       }
2173     }
2174   }
2175 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
2176 
2177 
2178 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X12,batch_eq_12)2179   TEST(F32_VELU__SSE2_RR2_LUT16_P3_X12, batch_eq_12) {
2180     TEST_REQUIRES_X86_SSE2;
2181     VUnaryMicrokernelTester()
2182       .batch_size(12)
2183       .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2184   }
2185 
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X12,batch_div_12)2186   TEST(F32_VELU__SSE2_RR2_LUT16_P3_X12, batch_div_12) {
2187     TEST_REQUIRES_X86_SSE2;
2188     for (size_t batch_size = 24; batch_size < 120; batch_size += 12) {
2189       VUnaryMicrokernelTester()
2190         .batch_size(batch_size)
2191         .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2192     }
2193   }
2194 
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X12,batch_lt_12)2195   TEST(F32_VELU__SSE2_RR2_LUT16_P3_X12, batch_lt_12) {
2196     TEST_REQUIRES_X86_SSE2;
2197     for (size_t batch_size = 1; batch_size < 12; batch_size++) {
2198       VUnaryMicrokernelTester()
2199         .batch_size(batch_size)
2200         .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2201     }
2202   }
2203 
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X12,batch_gt_12)2204   TEST(F32_VELU__SSE2_RR2_LUT16_P3_X12, batch_gt_12) {
2205     TEST_REQUIRES_X86_SSE2;
2206     for (size_t batch_size = 13; batch_size < 24; batch_size++) {
2207       VUnaryMicrokernelTester()
2208         .batch_size(batch_size)
2209         .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2210     }
2211   }
2212 
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X12,inplace)2213   TEST(F32_VELU__SSE2_RR2_LUT16_P3_X12, inplace) {
2214     TEST_REQUIRES_X86_SSE2;
2215     for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
2216       VUnaryMicrokernelTester()
2217         .batch_size(batch_size)
2218         .inplace(true)
2219         .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2220     }
2221   }
2222 
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X12,prescale)2223   TEST(F32_VELU__SSE2_RR2_LUT16_P3_X12, prescale) {
2224     TEST_REQUIRES_X86_SSE2;
2225     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
2226       for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
2227         VUnaryMicrokernelTester()
2228           .batch_size(batch_size)
2229           .prescale(prescale)
2230           .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2231       }
2232     }
2233   }
2234 
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X12,alpha)2235   TEST(F32_VELU__SSE2_RR2_LUT16_P3_X12, alpha) {
2236     TEST_REQUIRES_X86_SSE2;
2237     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
2238       for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
2239         VUnaryMicrokernelTester()
2240           .batch_size(batch_size)
2241           .alpha(alpha)
2242           .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2243       }
2244     }
2245   }
2246 
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X12,beta)2247   TEST(F32_VELU__SSE2_RR2_LUT16_P3_X12, beta) {
2248     TEST_REQUIRES_X86_SSE2;
2249     for (float beta : std::vector<float>({0.3f, 3.0f})) {
2250       for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
2251         VUnaryMicrokernelTester()
2252           .batch_size(batch_size)
2253           .beta(beta)
2254           .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2255       }
2256     }
2257   }
2258 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
2259 
2260 
2261 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X16,batch_eq_16)2262   TEST(F32_VELU__SSE2_RR2_LUT16_P3_X16, batch_eq_16) {
2263     TEST_REQUIRES_X86_SSE2;
2264     VUnaryMicrokernelTester()
2265       .batch_size(16)
2266       .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2267   }
2268 
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X16,batch_div_16)2269   TEST(F32_VELU__SSE2_RR2_LUT16_P3_X16, batch_div_16) {
2270     TEST_REQUIRES_X86_SSE2;
2271     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
2272       VUnaryMicrokernelTester()
2273         .batch_size(batch_size)
2274         .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2275     }
2276   }
2277 
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X16,batch_lt_16)2278   TEST(F32_VELU__SSE2_RR2_LUT16_P3_X16, batch_lt_16) {
2279     TEST_REQUIRES_X86_SSE2;
2280     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
2281       VUnaryMicrokernelTester()
2282         .batch_size(batch_size)
2283         .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2284     }
2285   }
2286 
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X16,batch_gt_16)2287   TEST(F32_VELU__SSE2_RR2_LUT16_P3_X16, batch_gt_16) {
2288     TEST_REQUIRES_X86_SSE2;
2289     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
2290       VUnaryMicrokernelTester()
2291         .batch_size(batch_size)
2292         .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2293     }
2294   }
2295 
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X16,inplace)2296   TEST(F32_VELU__SSE2_RR2_LUT16_P3_X16, inplace) {
2297     TEST_REQUIRES_X86_SSE2;
2298     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2299       VUnaryMicrokernelTester()
2300         .batch_size(batch_size)
2301         .inplace(true)
2302         .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2303     }
2304   }
2305 
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X16,prescale)2306   TEST(F32_VELU__SSE2_RR2_LUT16_P3_X16, prescale) {
2307     TEST_REQUIRES_X86_SSE2;
2308     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
2309       for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2310         VUnaryMicrokernelTester()
2311           .batch_size(batch_size)
2312           .prescale(prescale)
2313           .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2314       }
2315     }
2316   }
2317 
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X16,alpha)2318   TEST(F32_VELU__SSE2_RR2_LUT16_P3_X16, alpha) {
2319     TEST_REQUIRES_X86_SSE2;
2320     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
2321       for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2322         VUnaryMicrokernelTester()
2323           .batch_size(batch_size)
2324           .alpha(alpha)
2325           .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2326       }
2327     }
2328   }
2329 
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X16,beta)2330   TEST(F32_VELU__SSE2_RR2_LUT16_P3_X16, beta) {
2331     TEST_REQUIRES_X86_SSE2;
2332     for (float beta : std::vector<float>({0.3f, 3.0f})) {
2333       for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2334         VUnaryMicrokernelTester()
2335           .batch_size(batch_size)
2336           .beta(beta)
2337           .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2338       }
2339     }
2340   }
2341 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
2342 
2343 
2344 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X20,batch_eq_20)2345   TEST(F32_VELU__SSE2_RR2_LUT16_P3_X20, batch_eq_20) {
2346     TEST_REQUIRES_X86_SSE2;
2347     VUnaryMicrokernelTester()
2348       .batch_size(20)
2349       .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2350   }
2351 
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X20,batch_div_20)2352   TEST(F32_VELU__SSE2_RR2_LUT16_P3_X20, batch_div_20) {
2353     TEST_REQUIRES_X86_SSE2;
2354     for (size_t batch_size = 40; batch_size < 200; batch_size += 20) {
2355       VUnaryMicrokernelTester()
2356         .batch_size(batch_size)
2357         .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2358     }
2359   }
2360 
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X20,batch_lt_20)2361   TEST(F32_VELU__SSE2_RR2_LUT16_P3_X20, batch_lt_20) {
2362     TEST_REQUIRES_X86_SSE2;
2363     for (size_t batch_size = 1; batch_size < 20; batch_size++) {
2364       VUnaryMicrokernelTester()
2365         .batch_size(batch_size)
2366         .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2367     }
2368   }
2369 
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X20,batch_gt_20)2370   TEST(F32_VELU__SSE2_RR2_LUT16_P3_X20, batch_gt_20) {
2371     TEST_REQUIRES_X86_SSE2;
2372     for (size_t batch_size = 21; batch_size < 40; batch_size++) {
2373       VUnaryMicrokernelTester()
2374         .batch_size(batch_size)
2375         .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2376     }
2377   }
2378 
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X20,inplace)2379   TEST(F32_VELU__SSE2_RR2_LUT16_P3_X20, inplace) {
2380     TEST_REQUIRES_X86_SSE2;
2381     for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
2382       VUnaryMicrokernelTester()
2383         .batch_size(batch_size)
2384         .inplace(true)
2385         .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2386     }
2387   }
2388 
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X20,prescale)2389   TEST(F32_VELU__SSE2_RR2_LUT16_P3_X20, prescale) {
2390     TEST_REQUIRES_X86_SSE2;
2391     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
2392       for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
2393         VUnaryMicrokernelTester()
2394           .batch_size(batch_size)
2395           .prescale(prescale)
2396           .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2397       }
2398     }
2399   }
2400 
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X20,alpha)2401   TEST(F32_VELU__SSE2_RR2_LUT16_P3_X20, alpha) {
2402     TEST_REQUIRES_X86_SSE2;
2403     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
2404       for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
2405         VUnaryMicrokernelTester()
2406           .batch_size(batch_size)
2407           .alpha(alpha)
2408           .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2409       }
2410     }
2411   }
2412 
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X20,beta)2413   TEST(F32_VELU__SSE2_RR2_LUT16_P3_X20, beta) {
2414     TEST_REQUIRES_X86_SSE2;
2415     for (float beta : std::vector<float>({0.3f, 3.0f})) {
2416       for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
2417         VUnaryMicrokernelTester()
2418           .batch_size(batch_size)
2419           .beta(beta)
2420           .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2421       }
2422     }
2423   }
2424 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
2425 
2426 
2427 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X24,batch_eq_24)2428   TEST(F32_VELU__SSE2_RR2_LUT16_P3_X24, batch_eq_24) {
2429     TEST_REQUIRES_X86_SSE2;
2430     VUnaryMicrokernelTester()
2431       .batch_size(24)
2432       .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2433   }
2434 
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X24,batch_div_24)2435   TEST(F32_VELU__SSE2_RR2_LUT16_P3_X24, batch_div_24) {
2436     TEST_REQUIRES_X86_SSE2;
2437     for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
2438       VUnaryMicrokernelTester()
2439         .batch_size(batch_size)
2440         .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2441     }
2442   }
2443 
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X24,batch_lt_24)2444   TEST(F32_VELU__SSE2_RR2_LUT16_P3_X24, batch_lt_24) {
2445     TEST_REQUIRES_X86_SSE2;
2446     for (size_t batch_size = 1; batch_size < 24; batch_size++) {
2447       VUnaryMicrokernelTester()
2448         .batch_size(batch_size)
2449         .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2450     }
2451   }
2452 
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X24,batch_gt_24)2453   TEST(F32_VELU__SSE2_RR2_LUT16_P3_X24, batch_gt_24) {
2454     TEST_REQUIRES_X86_SSE2;
2455     for (size_t batch_size = 25; batch_size < 48; batch_size++) {
2456       VUnaryMicrokernelTester()
2457         .batch_size(batch_size)
2458         .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2459     }
2460   }
2461 
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X24,inplace)2462   TEST(F32_VELU__SSE2_RR2_LUT16_P3_X24, inplace) {
2463     TEST_REQUIRES_X86_SSE2;
2464     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2465       VUnaryMicrokernelTester()
2466         .batch_size(batch_size)
2467         .inplace(true)
2468         .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2469     }
2470   }
2471 
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X24,prescale)2472   TEST(F32_VELU__SSE2_RR2_LUT16_P3_X24, prescale) {
2473     TEST_REQUIRES_X86_SSE2;
2474     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
2475       for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2476         VUnaryMicrokernelTester()
2477           .batch_size(batch_size)
2478           .prescale(prescale)
2479           .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2480       }
2481     }
2482   }
2483 
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X24,alpha)2484   TEST(F32_VELU__SSE2_RR2_LUT16_P3_X24, alpha) {
2485     TEST_REQUIRES_X86_SSE2;
2486     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
2487       for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2488         VUnaryMicrokernelTester()
2489           .batch_size(batch_size)
2490           .alpha(alpha)
2491           .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2492       }
2493     }
2494   }
2495 
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X24,beta)2496   TEST(F32_VELU__SSE2_RR2_LUT16_P3_X24, beta) {
2497     TEST_REQUIRES_X86_SSE2;
2498     for (float beta : std::vector<float>({0.3f, 3.0f})) {
2499       for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2500         VUnaryMicrokernelTester()
2501           .batch_size(batch_size)
2502           .beta(beta)
2503           .Test(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
2504       }
2505     }
2506   }
2507 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
2508 
2509 
2510 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE2_RR2_P6_X4,batch_eq_4)2511   TEST(F32_VELU__SSE2_RR2_P6_X4, batch_eq_4) {
2512     TEST_REQUIRES_X86_SSE2;
2513     VUnaryMicrokernelTester()
2514       .batch_size(4)
2515       .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x4, xnn_init_f32_elu_sse2_rr2_p6_params);
2516   }
2517 
TEST(F32_VELU__SSE2_RR2_P6_X4,batch_div_4)2518   TEST(F32_VELU__SSE2_RR2_P6_X4, batch_div_4) {
2519     TEST_REQUIRES_X86_SSE2;
2520     for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
2521       VUnaryMicrokernelTester()
2522         .batch_size(batch_size)
2523         .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x4, xnn_init_f32_elu_sse2_rr2_p6_params);
2524     }
2525   }
2526 
TEST(F32_VELU__SSE2_RR2_P6_X4,batch_lt_4)2527   TEST(F32_VELU__SSE2_RR2_P6_X4, batch_lt_4) {
2528     TEST_REQUIRES_X86_SSE2;
2529     for (size_t batch_size = 1; batch_size < 4; batch_size++) {
2530       VUnaryMicrokernelTester()
2531         .batch_size(batch_size)
2532         .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x4, xnn_init_f32_elu_sse2_rr2_p6_params);
2533     }
2534   }
2535 
TEST(F32_VELU__SSE2_RR2_P6_X4,batch_gt_4)2536   TEST(F32_VELU__SSE2_RR2_P6_X4, batch_gt_4) {
2537     TEST_REQUIRES_X86_SSE2;
2538     for (size_t batch_size = 5; batch_size < 8; batch_size++) {
2539       VUnaryMicrokernelTester()
2540         .batch_size(batch_size)
2541         .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x4, xnn_init_f32_elu_sse2_rr2_p6_params);
2542     }
2543   }
2544 
TEST(F32_VELU__SSE2_RR2_P6_X4,inplace)2545   TEST(F32_VELU__SSE2_RR2_P6_X4, inplace) {
2546     TEST_REQUIRES_X86_SSE2;
2547     for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2548       VUnaryMicrokernelTester()
2549         .batch_size(batch_size)
2550         .inplace(true)
2551         .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x4, xnn_init_f32_elu_sse2_rr2_p6_params);
2552     }
2553   }
2554 
TEST(F32_VELU__SSE2_RR2_P6_X4,prescale)2555   TEST(F32_VELU__SSE2_RR2_P6_X4, prescale) {
2556     TEST_REQUIRES_X86_SSE2;
2557     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
2558       for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2559         VUnaryMicrokernelTester()
2560           .batch_size(batch_size)
2561           .prescale(prescale)
2562           .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x4, xnn_init_f32_elu_sse2_rr2_p6_params);
2563       }
2564     }
2565   }
2566 
TEST(F32_VELU__SSE2_RR2_P6_X4,alpha)2567   TEST(F32_VELU__SSE2_RR2_P6_X4, alpha) {
2568     TEST_REQUIRES_X86_SSE2;
2569     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
2570       for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2571         VUnaryMicrokernelTester()
2572           .batch_size(batch_size)
2573           .alpha(alpha)
2574           .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x4, xnn_init_f32_elu_sse2_rr2_p6_params);
2575       }
2576     }
2577   }
2578 
TEST(F32_VELU__SSE2_RR2_P6_X4,beta)2579   TEST(F32_VELU__SSE2_RR2_P6_X4, beta) {
2580     TEST_REQUIRES_X86_SSE2;
2581     for (float beta : std::vector<float>({0.3f, 3.0f})) {
2582       for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2583         VUnaryMicrokernelTester()
2584           .batch_size(batch_size)
2585           .beta(beta)
2586           .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x4, xnn_init_f32_elu_sse2_rr2_p6_params);
2587       }
2588     }
2589   }
2590 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
2591 
2592 
2593 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE2_RR2_P6_X8,batch_eq_8)2594   TEST(F32_VELU__SSE2_RR2_P6_X8, batch_eq_8) {
2595     TEST_REQUIRES_X86_SSE2;
2596     VUnaryMicrokernelTester()
2597       .batch_size(8)
2598       .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x8, xnn_init_f32_elu_sse2_rr2_p6_params);
2599   }
2600 
TEST(F32_VELU__SSE2_RR2_P6_X8,batch_div_8)2601   TEST(F32_VELU__SSE2_RR2_P6_X8, batch_div_8) {
2602     TEST_REQUIRES_X86_SSE2;
2603     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
2604       VUnaryMicrokernelTester()
2605         .batch_size(batch_size)
2606         .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x8, xnn_init_f32_elu_sse2_rr2_p6_params);
2607     }
2608   }
2609 
TEST(F32_VELU__SSE2_RR2_P6_X8,batch_lt_8)2610   TEST(F32_VELU__SSE2_RR2_P6_X8, batch_lt_8) {
2611     TEST_REQUIRES_X86_SSE2;
2612     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
2613       VUnaryMicrokernelTester()
2614         .batch_size(batch_size)
2615         .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x8, xnn_init_f32_elu_sse2_rr2_p6_params);
2616     }
2617   }
2618 
TEST(F32_VELU__SSE2_RR2_P6_X8,batch_gt_8)2619   TEST(F32_VELU__SSE2_RR2_P6_X8, batch_gt_8) {
2620     TEST_REQUIRES_X86_SSE2;
2621     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
2622       VUnaryMicrokernelTester()
2623         .batch_size(batch_size)
2624         .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x8, xnn_init_f32_elu_sse2_rr2_p6_params);
2625     }
2626   }
2627 
TEST(F32_VELU__SSE2_RR2_P6_X8,inplace)2628   TEST(F32_VELU__SSE2_RR2_P6_X8, inplace) {
2629     TEST_REQUIRES_X86_SSE2;
2630     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2631       VUnaryMicrokernelTester()
2632         .batch_size(batch_size)
2633         .inplace(true)
2634         .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x8, xnn_init_f32_elu_sse2_rr2_p6_params);
2635     }
2636   }
2637 
TEST(F32_VELU__SSE2_RR2_P6_X8,prescale)2638   TEST(F32_VELU__SSE2_RR2_P6_X8, prescale) {
2639     TEST_REQUIRES_X86_SSE2;
2640     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
2641       for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2642         VUnaryMicrokernelTester()
2643           .batch_size(batch_size)
2644           .prescale(prescale)
2645           .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x8, xnn_init_f32_elu_sse2_rr2_p6_params);
2646       }
2647     }
2648   }
2649 
TEST(F32_VELU__SSE2_RR2_P6_X8,alpha)2650   TEST(F32_VELU__SSE2_RR2_P6_X8, alpha) {
2651     TEST_REQUIRES_X86_SSE2;
2652     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
2653       for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2654         VUnaryMicrokernelTester()
2655           .batch_size(batch_size)
2656           .alpha(alpha)
2657           .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x8, xnn_init_f32_elu_sse2_rr2_p6_params);
2658       }
2659     }
2660   }
2661 
TEST(F32_VELU__SSE2_RR2_P6_X8,beta)2662   TEST(F32_VELU__SSE2_RR2_P6_X8, beta) {
2663     TEST_REQUIRES_X86_SSE2;
2664     for (float beta : std::vector<float>({0.3f, 3.0f})) {
2665       for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2666         VUnaryMicrokernelTester()
2667           .batch_size(batch_size)
2668           .beta(beta)
2669           .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x8, xnn_init_f32_elu_sse2_rr2_p6_params);
2670       }
2671     }
2672   }
2673 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
2674 
2675 
2676 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE2_RR2_P6_X12,batch_eq_12)2677   TEST(F32_VELU__SSE2_RR2_P6_X12, batch_eq_12) {
2678     TEST_REQUIRES_X86_SSE2;
2679     VUnaryMicrokernelTester()
2680       .batch_size(12)
2681       .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x12, xnn_init_f32_elu_sse2_rr2_p6_params);
2682   }
2683 
TEST(F32_VELU__SSE2_RR2_P6_X12,batch_div_12)2684   TEST(F32_VELU__SSE2_RR2_P6_X12, batch_div_12) {
2685     TEST_REQUIRES_X86_SSE2;
2686     for (size_t batch_size = 24; batch_size < 120; batch_size += 12) {
2687       VUnaryMicrokernelTester()
2688         .batch_size(batch_size)
2689         .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x12, xnn_init_f32_elu_sse2_rr2_p6_params);
2690     }
2691   }
2692 
TEST(F32_VELU__SSE2_RR2_P6_X12,batch_lt_12)2693   TEST(F32_VELU__SSE2_RR2_P6_X12, batch_lt_12) {
2694     TEST_REQUIRES_X86_SSE2;
2695     for (size_t batch_size = 1; batch_size < 12; batch_size++) {
2696       VUnaryMicrokernelTester()
2697         .batch_size(batch_size)
2698         .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x12, xnn_init_f32_elu_sse2_rr2_p6_params);
2699     }
2700   }
2701 
TEST(F32_VELU__SSE2_RR2_P6_X12,batch_gt_12)2702   TEST(F32_VELU__SSE2_RR2_P6_X12, batch_gt_12) {
2703     TEST_REQUIRES_X86_SSE2;
2704     for (size_t batch_size = 13; batch_size < 24; batch_size++) {
2705       VUnaryMicrokernelTester()
2706         .batch_size(batch_size)
2707         .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x12, xnn_init_f32_elu_sse2_rr2_p6_params);
2708     }
2709   }
2710 
TEST(F32_VELU__SSE2_RR2_P6_X12,inplace)2711   TEST(F32_VELU__SSE2_RR2_P6_X12, inplace) {
2712     TEST_REQUIRES_X86_SSE2;
2713     for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
2714       VUnaryMicrokernelTester()
2715         .batch_size(batch_size)
2716         .inplace(true)
2717         .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x12, xnn_init_f32_elu_sse2_rr2_p6_params);
2718     }
2719   }
2720 
TEST(F32_VELU__SSE2_RR2_P6_X12,prescale)2721   TEST(F32_VELU__SSE2_RR2_P6_X12, prescale) {
2722     TEST_REQUIRES_X86_SSE2;
2723     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
2724       for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
2725         VUnaryMicrokernelTester()
2726           .batch_size(batch_size)
2727           .prescale(prescale)
2728           .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x12, xnn_init_f32_elu_sse2_rr2_p6_params);
2729       }
2730     }
2731   }
2732 
TEST(F32_VELU__SSE2_RR2_P6_X12,alpha)2733   TEST(F32_VELU__SSE2_RR2_P6_X12, alpha) {
2734     TEST_REQUIRES_X86_SSE2;
2735     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
2736       for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
2737         VUnaryMicrokernelTester()
2738           .batch_size(batch_size)
2739           .alpha(alpha)
2740           .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x12, xnn_init_f32_elu_sse2_rr2_p6_params);
2741       }
2742     }
2743   }
2744 
TEST(F32_VELU__SSE2_RR2_P6_X12,beta)2745   TEST(F32_VELU__SSE2_RR2_P6_X12, beta) {
2746     TEST_REQUIRES_X86_SSE2;
2747     for (float beta : std::vector<float>({0.3f, 3.0f})) {
2748       for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
2749         VUnaryMicrokernelTester()
2750           .batch_size(batch_size)
2751           .beta(beta)
2752           .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x12, xnn_init_f32_elu_sse2_rr2_p6_params);
2753       }
2754     }
2755   }
2756 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
2757 
2758 
2759 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE2_RR2_P6_X16,batch_eq_16)2760   TEST(F32_VELU__SSE2_RR2_P6_X16, batch_eq_16) {
2761     TEST_REQUIRES_X86_SSE2;
2762     VUnaryMicrokernelTester()
2763       .batch_size(16)
2764       .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x16, xnn_init_f32_elu_sse2_rr2_p6_params);
2765   }
2766 
TEST(F32_VELU__SSE2_RR2_P6_X16,batch_div_16)2767   TEST(F32_VELU__SSE2_RR2_P6_X16, batch_div_16) {
2768     TEST_REQUIRES_X86_SSE2;
2769     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
2770       VUnaryMicrokernelTester()
2771         .batch_size(batch_size)
2772         .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x16, xnn_init_f32_elu_sse2_rr2_p6_params);
2773     }
2774   }
2775 
TEST(F32_VELU__SSE2_RR2_P6_X16,batch_lt_16)2776   TEST(F32_VELU__SSE2_RR2_P6_X16, batch_lt_16) {
2777     TEST_REQUIRES_X86_SSE2;
2778     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
2779       VUnaryMicrokernelTester()
2780         .batch_size(batch_size)
2781         .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x16, xnn_init_f32_elu_sse2_rr2_p6_params);
2782     }
2783   }
2784 
TEST(F32_VELU__SSE2_RR2_P6_X16,batch_gt_16)2785   TEST(F32_VELU__SSE2_RR2_P6_X16, batch_gt_16) {
2786     TEST_REQUIRES_X86_SSE2;
2787     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
2788       VUnaryMicrokernelTester()
2789         .batch_size(batch_size)
2790         .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x16, xnn_init_f32_elu_sse2_rr2_p6_params);
2791     }
2792   }
2793 
TEST(F32_VELU__SSE2_RR2_P6_X16,inplace)2794   TEST(F32_VELU__SSE2_RR2_P6_X16, inplace) {
2795     TEST_REQUIRES_X86_SSE2;
2796     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2797       VUnaryMicrokernelTester()
2798         .batch_size(batch_size)
2799         .inplace(true)
2800         .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x16, xnn_init_f32_elu_sse2_rr2_p6_params);
2801     }
2802   }
2803 
TEST(F32_VELU__SSE2_RR2_P6_X16,prescale)2804   TEST(F32_VELU__SSE2_RR2_P6_X16, prescale) {
2805     TEST_REQUIRES_X86_SSE2;
2806     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
2807       for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2808         VUnaryMicrokernelTester()
2809           .batch_size(batch_size)
2810           .prescale(prescale)
2811           .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x16, xnn_init_f32_elu_sse2_rr2_p6_params);
2812       }
2813     }
2814   }
2815 
TEST(F32_VELU__SSE2_RR2_P6_X16,alpha)2816   TEST(F32_VELU__SSE2_RR2_P6_X16, alpha) {
2817     TEST_REQUIRES_X86_SSE2;
2818     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
2819       for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2820         VUnaryMicrokernelTester()
2821           .batch_size(batch_size)
2822           .alpha(alpha)
2823           .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x16, xnn_init_f32_elu_sse2_rr2_p6_params);
2824       }
2825     }
2826   }
2827 
TEST(F32_VELU__SSE2_RR2_P6_X16,beta)2828   TEST(F32_VELU__SSE2_RR2_P6_X16, beta) {
2829     TEST_REQUIRES_X86_SSE2;
2830     for (float beta : std::vector<float>({0.3f, 3.0f})) {
2831       for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2832         VUnaryMicrokernelTester()
2833           .batch_size(batch_size)
2834           .beta(beta)
2835           .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x16, xnn_init_f32_elu_sse2_rr2_p6_params);
2836       }
2837     }
2838   }
2839 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
2840 
2841 
2842 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE2_RR2_P6_X20,batch_eq_20)2843   TEST(F32_VELU__SSE2_RR2_P6_X20, batch_eq_20) {
2844     TEST_REQUIRES_X86_SSE2;
2845     VUnaryMicrokernelTester()
2846       .batch_size(20)
2847       .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x20, xnn_init_f32_elu_sse2_rr2_p6_params);
2848   }
2849 
TEST(F32_VELU__SSE2_RR2_P6_X20,batch_div_20)2850   TEST(F32_VELU__SSE2_RR2_P6_X20, batch_div_20) {
2851     TEST_REQUIRES_X86_SSE2;
2852     for (size_t batch_size = 40; batch_size < 200; batch_size += 20) {
2853       VUnaryMicrokernelTester()
2854         .batch_size(batch_size)
2855         .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x20, xnn_init_f32_elu_sse2_rr2_p6_params);
2856     }
2857   }
2858 
TEST(F32_VELU__SSE2_RR2_P6_X20,batch_lt_20)2859   TEST(F32_VELU__SSE2_RR2_P6_X20, batch_lt_20) {
2860     TEST_REQUIRES_X86_SSE2;
2861     for (size_t batch_size = 1; batch_size < 20; batch_size++) {
2862       VUnaryMicrokernelTester()
2863         .batch_size(batch_size)
2864         .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x20, xnn_init_f32_elu_sse2_rr2_p6_params);
2865     }
2866   }
2867 
TEST(F32_VELU__SSE2_RR2_P6_X20,batch_gt_20)2868   TEST(F32_VELU__SSE2_RR2_P6_X20, batch_gt_20) {
2869     TEST_REQUIRES_X86_SSE2;
2870     for (size_t batch_size = 21; batch_size < 40; batch_size++) {
2871       VUnaryMicrokernelTester()
2872         .batch_size(batch_size)
2873         .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x20, xnn_init_f32_elu_sse2_rr2_p6_params);
2874     }
2875   }
2876 
TEST(F32_VELU__SSE2_RR2_P6_X20,inplace)2877   TEST(F32_VELU__SSE2_RR2_P6_X20, inplace) {
2878     TEST_REQUIRES_X86_SSE2;
2879     for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
2880       VUnaryMicrokernelTester()
2881         .batch_size(batch_size)
2882         .inplace(true)
2883         .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x20, xnn_init_f32_elu_sse2_rr2_p6_params);
2884     }
2885   }
2886 
TEST(F32_VELU__SSE2_RR2_P6_X20,prescale)2887   TEST(F32_VELU__SSE2_RR2_P6_X20, prescale) {
2888     TEST_REQUIRES_X86_SSE2;
2889     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
2890       for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
2891         VUnaryMicrokernelTester()
2892           .batch_size(batch_size)
2893           .prescale(prescale)
2894           .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x20, xnn_init_f32_elu_sse2_rr2_p6_params);
2895       }
2896     }
2897   }
2898 
TEST(F32_VELU__SSE2_RR2_P6_X20,alpha)2899   TEST(F32_VELU__SSE2_RR2_P6_X20, alpha) {
2900     TEST_REQUIRES_X86_SSE2;
2901     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
2902       for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
2903         VUnaryMicrokernelTester()
2904           .batch_size(batch_size)
2905           .alpha(alpha)
2906           .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x20, xnn_init_f32_elu_sse2_rr2_p6_params);
2907       }
2908     }
2909   }
2910 
TEST(F32_VELU__SSE2_RR2_P6_X20,beta)2911   TEST(F32_VELU__SSE2_RR2_P6_X20, beta) {
2912     TEST_REQUIRES_X86_SSE2;
2913     for (float beta : std::vector<float>({0.3f, 3.0f})) {
2914       for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
2915         VUnaryMicrokernelTester()
2916           .batch_size(batch_size)
2917           .beta(beta)
2918           .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x20, xnn_init_f32_elu_sse2_rr2_p6_params);
2919       }
2920     }
2921   }
2922 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
2923 
2924 
2925 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE2_RR2_P6_X24,batch_eq_24)2926   TEST(F32_VELU__SSE2_RR2_P6_X24, batch_eq_24) {
2927     TEST_REQUIRES_X86_SSE2;
2928     VUnaryMicrokernelTester()
2929       .batch_size(24)
2930       .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x24, xnn_init_f32_elu_sse2_rr2_p6_params);
2931   }
2932 
TEST(F32_VELU__SSE2_RR2_P6_X24,batch_div_24)2933   TEST(F32_VELU__SSE2_RR2_P6_X24, batch_div_24) {
2934     TEST_REQUIRES_X86_SSE2;
2935     for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
2936       VUnaryMicrokernelTester()
2937         .batch_size(batch_size)
2938         .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x24, xnn_init_f32_elu_sse2_rr2_p6_params);
2939     }
2940   }
2941 
TEST(F32_VELU__SSE2_RR2_P6_X24,batch_lt_24)2942   TEST(F32_VELU__SSE2_RR2_P6_X24, batch_lt_24) {
2943     TEST_REQUIRES_X86_SSE2;
2944     for (size_t batch_size = 1; batch_size < 24; batch_size++) {
2945       VUnaryMicrokernelTester()
2946         .batch_size(batch_size)
2947         .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x24, xnn_init_f32_elu_sse2_rr2_p6_params);
2948     }
2949   }
2950 
TEST(F32_VELU__SSE2_RR2_P6_X24,batch_gt_24)2951   TEST(F32_VELU__SSE2_RR2_P6_X24, batch_gt_24) {
2952     TEST_REQUIRES_X86_SSE2;
2953     for (size_t batch_size = 25; batch_size < 48; batch_size++) {
2954       VUnaryMicrokernelTester()
2955         .batch_size(batch_size)
2956         .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x24, xnn_init_f32_elu_sse2_rr2_p6_params);
2957     }
2958   }
2959 
TEST(F32_VELU__SSE2_RR2_P6_X24,inplace)2960   TEST(F32_VELU__SSE2_RR2_P6_X24, inplace) {
2961     TEST_REQUIRES_X86_SSE2;
2962     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2963       VUnaryMicrokernelTester()
2964         .batch_size(batch_size)
2965         .inplace(true)
2966         .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x24, xnn_init_f32_elu_sse2_rr2_p6_params);
2967     }
2968   }
2969 
TEST(F32_VELU__SSE2_RR2_P6_X24,prescale)2970   TEST(F32_VELU__SSE2_RR2_P6_X24, prescale) {
2971     TEST_REQUIRES_X86_SSE2;
2972     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
2973       for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2974         VUnaryMicrokernelTester()
2975           .batch_size(batch_size)
2976           .prescale(prescale)
2977           .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x24, xnn_init_f32_elu_sse2_rr2_p6_params);
2978       }
2979     }
2980   }
2981 
TEST(F32_VELU__SSE2_RR2_P6_X24,alpha)2982   TEST(F32_VELU__SSE2_RR2_P6_X24, alpha) {
2983     TEST_REQUIRES_X86_SSE2;
2984     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
2985       for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2986         VUnaryMicrokernelTester()
2987           .batch_size(batch_size)
2988           .alpha(alpha)
2989           .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x24, xnn_init_f32_elu_sse2_rr2_p6_params);
2990       }
2991     }
2992   }
2993 
TEST(F32_VELU__SSE2_RR2_P6_X24,beta)2994   TEST(F32_VELU__SSE2_RR2_P6_X24, beta) {
2995     TEST_REQUIRES_X86_SSE2;
2996     for (float beta : std::vector<float>({0.3f, 3.0f})) {
2997       for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2998         VUnaryMicrokernelTester()
2999           .batch_size(batch_size)
3000           .beta(beta)
3001           .Test(xnn_f32_velu_ukernel__sse2_rr2_p6_x24, xnn_init_f32_elu_sse2_rr2_p6_params);
3002       }
3003     }
3004   }
3005 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
3006 
3007 
3008 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X4,batch_eq_4)3009   TEST(F32_VELU__SSE41_RR2_LUT16_P3_X4, batch_eq_4) {
3010     TEST_REQUIRES_X86_SSE41;
3011     VUnaryMicrokernelTester()
3012       .batch_size(4)
3013       .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3014   }
3015 
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X4,batch_div_4)3016   TEST(F32_VELU__SSE41_RR2_LUT16_P3_X4, batch_div_4) {
3017     TEST_REQUIRES_X86_SSE41;
3018     for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
3019       VUnaryMicrokernelTester()
3020         .batch_size(batch_size)
3021         .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3022     }
3023   }
3024 
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X4,batch_lt_4)3025   TEST(F32_VELU__SSE41_RR2_LUT16_P3_X4, batch_lt_4) {
3026     TEST_REQUIRES_X86_SSE41;
3027     for (size_t batch_size = 1; batch_size < 4; batch_size++) {
3028       VUnaryMicrokernelTester()
3029         .batch_size(batch_size)
3030         .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3031     }
3032   }
3033 
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X4,batch_gt_4)3034   TEST(F32_VELU__SSE41_RR2_LUT16_P3_X4, batch_gt_4) {
3035     TEST_REQUIRES_X86_SSE41;
3036     for (size_t batch_size = 5; batch_size < 8; batch_size++) {
3037       VUnaryMicrokernelTester()
3038         .batch_size(batch_size)
3039         .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3040     }
3041   }
3042 
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X4,inplace)3043   TEST(F32_VELU__SSE41_RR2_LUT16_P3_X4, inplace) {
3044     TEST_REQUIRES_X86_SSE41;
3045     for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
3046       VUnaryMicrokernelTester()
3047         .batch_size(batch_size)
3048         .inplace(true)
3049         .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3050     }
3051   }
3052 
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X4,prescale)3053   TEST(F32_VELU__SSE41_RR2_LUT16_P3_X4, prescale) {
3054     TEST_REQUIRES_X86_SSE41;
3055     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
3056       for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
3057         VUnaryMicrokernelTester()
3058           .batch_size(batch_size)
3059           .prescale(prescale)
3060           .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3061       }
3062     }
3063   }
3064 
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X4,alpha)3065   TEST(F32_VELU__SSE41_RR2_LUT16_P3_X4, alpha) {
3066     TEST_REQUIRES_X86_SSE41;
3067     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
3068       for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
3069         VUnaryMicrokernelTester()
3070           .batch_size(batch_size)
3071           .alpha(alpha)
3072           .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3073       }
3074     }
3075   }
3076 
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X4,beta)3077   TEST(F32_VELU__SSE41_RR2_LUT16_P3_X4, beta) {
3078     TEST_REQUIRES_X86_SSE41;
3079     for (float beta : std::vector<float>({0.3f, 3.0f})) {
3080       for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
3081         VUnaryMicrokernelTester()
3082           .batch_size(batch_size)
3083           .beta(beta)
3084           .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3085       }
3086     }
3087   }
3088 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
3089 
3090 
3091 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X8,batch_eq_8)3092   TEST(F32_VELU__SSE41_RR2_LUT16_P3_X8, batch_eq_8) {
3093     TEST_REQUIRES_X86_SSE41;
3094     VUnaryMicrokernelTester()
3095       .batch_size(8)
3096       .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3097   }
3098 
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X8,batch_div_8)3099   TEST(F32_VELU__SSE41_RR2_LUT16_P3_X8, batch_div_8) {
3100     TEST_REQUIRES_X86_SSE41;
3101     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
3102       VUnaryMicrokernelTester()
3103         .batch_size(batch_size)
3104         .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3105     }
3106   }
3107 
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X8,batch_lt_8)3108   TEST(F32_VELU__SSE41_RR2_LUT16_P3_X8, batch_lt_8) {
3109     TEST_REQUIRES_X86_SSE41;
3110     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
3111       VUnaryMicrokernelTester()
3112         .batch_size(batch_size)
3113         .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3114     }
3115   }
3116 
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X8,batch_gt_8)3117   TEST(F32_VELU__SSE41_RR2_LUT16_P3_X8, batch_gt_8) {
3118     TEST_REQUIRES_X86_SSE41;
3119     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
3120       VUnaryMicrokernelTester()
3121         .batch_size(batch_size)
3122         .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3123     }
3124   }
3125 
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X8,inplace)3126   TEST(F32_VELU__SSE41_RR2_LUT16_P3_X8, inplace) {
3127     TEST_REQUIRES_X86_SSE41;
3128     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3129       VUnaryMicrokernelTester()
3130         .batch_size(batch_size)
3131         .inplace(true)
3132         .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3133     }
3134   }
3135 
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X8,prescale)3136   TEST(F32_VELU__SSE41_RR2_LUT16_P3_X8, prescale) {
3137     TEST_REQUIRES_X86_SSE41;
3138     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
3139       for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3140         VUnaryMicrokernelTester()
3141           .batch_size(batch_size)
3142           .prescale(prescale)
3143           .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3144       }
3145     }
3146   }
3147 
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X8,alpha)3148   TEST(F32_VELU__SSE41_RR2_LUT16_P3_X8, alpha) {
3149     TEST_REQUIRES_X86_SSE41;
3150     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
3151       for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3152         VUnaryMicrokernelTester()
3153           .batch_size(batch_size)
3154           .alpha(alpha)
3155           .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3156       }
3157     }
3158   }
3159 
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X8,beta)3160   TEST(F32_VELU__SSE41_RR2_LUT16_P3_X8, beta) {
3161     TEST_REQUIRES_X86_SSE41;
3162     for (float beta : std::vector<float>({0.3f, 3.0f})) {
3163       for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3164         VUnaryMicrokernelTester()
3165           .batch_size(batch_size)
3166           .beta(beta)
3167           .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3168       }
3169     }
3170   }
3171 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
3172 
3173 
3174 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X12,batch_eq_12)3175   TEST(F32_VELU__SSE41_RR2_LUT16_P3_X12, batch_eq_12) {
3176     TEST_REQUIRES_X86_SSE41;
3177     VUnaryMicrokernelTester()
3178       .batch_size(12)
3179       .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3180   }
3181 
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X12,batch_div_12)3182   TEST(F32_VELU__SSE41_RR2_LUT16_P3_X12, batch_div_12) {
3183     TEST_REQUIRES_X86_SSE41;
3184     for (size_t batch_size = 24; batch_size < 120; batch_size += 12) {
3185       VUnaryMicrokernelTester()
3186         .batch_size(batch_size)
3187         .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3188     }
3189   }
3190 
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X12,batch_lt_12)3191   TEST(F32_VELU__SSE41_RR2_LUT16_P3_X12, batch_lt_12) {
3192     TEST_REQUIRES_X86_SSE41;
3193     for (size_t batch_size = 1; batch_size < 12; batch_size++) {
3194       VUnaryMicrokernelTester()
3195         .batch_size(batch_size)
3196         .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3197     }
3198   }
3199 
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X12,batch_gt_12)3200   TEST(F32_VELU__SSE41_RR2_LUT16_P3_X12, batch_gt_12) {
3201     TEST_REQUIRES_X86_SSE41;
3202     for (size_t batch_size = 13; batch_size < 24; batch_size++) {
3203       VUnaryMicrokernelTester()
3204         .batch_size(batch_size)
3205         .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3206     }
3207   }
3208 
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X12,inplace)3209   TEST(F32_VELU__SSE41_RR2_LUT16_P3_X12, inplace) {
3210     TEST_REQUIRES_X86_SSE41;
3211     for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
3212       VUnaryMicrokernelTester()
3213         .batch_size(batch_size)
3214         .inplace(true)
3215         .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3216     }
3217   }
3218 
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X12,prescale)3219   TEST(F32_VELU__SSE41_RR2_LUT16_P3_X12, prescale) {
3220     TEST_REQUIRES_X86_SSE41;
3221     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
3222       for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
3223         VUnaryMicrokernelTester()
3224           .batch_size(batch_size)
3225           .prescale(prescale)
3226           .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3227       }
3228     }
3229   }
3230 
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X12,alpha)3231   TEST(F32_VELU__SSE41_RR2_LUT16_P3_X12, alpha) {
3232     TEST_REQUIRES_X86_SSE41;
3233     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
3234       for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
3235         VUnaryMicrokernelTester()
3236           .batch_size(batch_size)
3237           .alpha(alpha)
3238           .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3239       }
3240     }
3241   }
3242 
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X12,beta)3243   TEST(F32_VELU__SSE41_RR2_LUT16_P3_X12, beta) {
3244     TEST_REQUIRES_X86_SSE41;
3245     for (float beta : std::vector<float>({0.3f, 3.0f})) {
3246       for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
3247         VUnaryMicrokernelTester()
3248           .batch_size(batch_size)
3249           .beta(beta)
3250           .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3251       }
3252     }
3253   }
3254 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
3255 
3256 
3257 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X16,batch_eq_16)3258   TEST(F32_VELU__SSE41_RR2_LUT16_P3_X16, batch_eq_16) {
3259     TEST_REQUIRES_X86_SSE41;
3260     VUnaryMicrokernelTester()
3261       .batch_size(16)
3262       .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3263   }
3264 
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X16,batch_div_16)3265   TEST(F32_VELU__SSE41_RR2_LUT16_P3_X16, batch_div_16) {
3266     TEST_REQUIRES_X86_SSE41;
3267     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
3268       VUnaryMicrokernelTester()
3269         .batch_size(batch_size)
3270         .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3271     }
3272   }
3273 
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X16,batch_lt_16)3274   TEST(F32_VELU__SSE41_RR2_LUT16_P3_X16, batch_lt_16) {
3275     TEST_REQUIRES_X86_SSE41;
3276     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
3277       VUnaryMicrokernelTester()
3278         .batch_size(batch_size)
3279         .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3280     }
3281   }
3282 
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X16,batch_gt_16)3283   TEST(F32_VELU__SSE41_RR2_LUT16_P3_X16, batch_gt_16) {
3284     TEST_REQUIRES_X86_SSE41;
3285     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
3286       VUnaryMicrokernelTester()
3287         .batch_size(batch_size)
3288         .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3289     }
3290   }
3291 
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X16,inplace)3292   TEST(F32_VELU__SSE41_RR2_LUT16_P3_X16, inplace) {
3293     TEST_REQUIRES_X86_SSE41;
3294     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3295       VUnaryMicrokernelTester()
3296         .batch_size(batch_size)
3297         .inplace(true)
3298         .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3299     }
3300   }
3301 
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X16,prescale)3302   TEST(F32_VELU__SSE41_RR2_LUT16_P3_X16, prescale) {
3303     TEST_REQUIRES_X86_SSE41;
3304     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
3305       for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3306         VUnaryMicrokernelTester()
3307           .batch_size(batch_size)
3308           .prescale(prescale)
3309           .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3310       }
3311     }
3312   }
3313 
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X16,alpha)3314   TEST(F32_VELU__SSE41_RR2_LUT16_P3_X16, alpha) {
3315     TEST_REQUIRES_X86_SSE41;
3316     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
3317       for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3318         VUnaryMicrokernelTester()
3319           .batch_size(batch_size)
3320           .alpha(alpha)
3321           .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3322       }
3323     }
3324   }
3325 
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X16,beta)3326   TEST(F32_VELU__SSE41_RR2_LUT16_P3_X16, beta) {
3327     TEST_REQUIRES_X86_SSE41;
3328     for (float beta : std::vector<float>({0.3f, 3.0f})) {
3329       for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3330         VUnaryMicrokernelTester()
3331           .batch_size(batch_size)
3332           .beta(beta)
3333           .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3334       }
3335     }
3336   }
3337 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
3338 
3339 
3340 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X20,batch_eq_20)3341   TEST(F32_VELU__SSE41_RR2_LUT16_P3_X20, batch_eq_20) {
3342     TEST_REQUIRES_X86_SSE41;
3343     VUnaryMicrokernelTester()
3344       .batch_size(20)
3345       .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3346   }
3347 
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X20,batch_div_20)3348   TEST(F32_VELU__SSE41_RR2_LUT16_P3_X20, batch_div_20) {
3349     TEST_REQUIRES_X86_SSE41;
3350     for (size_t batch_size = 40; batch_size < 200; batch_size += 20) {
3351       VUnaryMicrokernelTester()
3352         .batch_size(batch_size)
3353         .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3354     }
3355   }
3356 
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X20,batch_lt_20)3357   TEST(F32_VELU__SSE41_RR2_LUT16_P3_X20, batch_lt_20) {
3358     TEST_REQUIRES_X86_SSE41;
3359     for (size_t batch_size = 1; batch_size < 20; batch_size++) {
3360       VUnaryMicrokernelTester()
3361         .batch_size(batch_size)
3362         .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3363     }
3364   }
3365 
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X20,batch_gt_20)3366   TEST(F32_VELU__SSE41_RR2_LUT16_P3_X20, batch_gt_20) {
3367     TEST_REQUIRES_X86_SSE41;
3368     for (size_t batch_size = 21; batch_size < 40; batch_size++) {
3369       VUnaryMicrokernelTester()
3370         .batch_size(batch_size)
3371         .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3372     }
3373   }
3374 
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X20,inplace)3375   TEST(F32_VELU__SSE41_RR2_LUT16_P3_X20, inplace) {
3376     TEST_REQUIRES_X86_SSE41;
3377     for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
3378       VUnaryMicrokernelTester()
3379         .batch_size(batch_size)
3380         .inplace(true)
3381         .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3382     }
3383   }
3384 
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X20,prescale)3385   TEST(F32_VELU__SSE41_RR2_LUT16_P3_X20, prescale) {
3386     TEST_REQUIRES_X86_SSE41;
3387     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
3388       for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
3389         VUnaryMicrokernelTester()
3390           .batch_size(batch_size)
3391           .prescale(prescale)
3392           .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3393       }
3394     }
3395   }
3396 
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X20,alpha)3397   TEST(F32_VELU__SSE41_RR2_LUT16_P3_X20, alpha) {
3398     TEST_REQUIRES_X86_SSE41;
3399     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
3400       for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
3401         VUnaryMicrokernelTester()
3402           .batch_size(batch_size)
3403           .alpha(alpha)
3404           .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3405       }
3406     }
3407   }
3408 
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X20,beta)3409   TEST(F32_VELU__SSE41_RR2_LUT16_P3_X20, beta) {
3410     TEST_REQUIRES_X86_SSE41;
3411     for (float beta : std::vector<float>({0.3f, 3.0f})) {
3412       for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
3413         VUnaryMicrokernelTester()
3414           .batch_size(batch_size)
3415           .beta(beta)
3416           .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3417       }
3418     }
3419   }
3420 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
3421 
3422 
3423 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X24,batch_eq_24)3424   TEST(F32_VELU__SSE41_RR2_LUT16_P3_X24, batch_eq_24) {
3425     TEST_REQUIRES_X86_SSE41;
3426     VUnaryMicrokernelTester()
3427       .batch_size(24)
3428       .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3429   }
3430 
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X24,batch_div_24)3431   TEST(F32_VELU__SSE41_RR2_LUT16_P3_X24, batch_div_24) {
3432     TEST_REQUIRES_X86_SSE41;
3433     for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
3434       VUnaryMicrokernelTester()
3435         .batch_size(batch_size)
3436         .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3437     }
3438   }
3439 
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X24,batch_lt_24)3440   TEST(F32_VELU__SSE41_RR2_LUT16_P3_X24, batch_lt_24) {
3441     TEST_REQUIRES_X86_SSE41;
3442     for (size_t batch_size = 1; batch_size < 24; batch_size++) {
3443       VUnaryMicrokernelTester()
3444         .batch_size(batch_size)
3445         .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3446     }
3447   }
3448 
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X24,batch_gt_24)3449   TEST(F32_VELU__SSE41_RR2_LUT16_P3_X24, batch_gt_24) {
3450     TEST_REQUIRES_X86_SSE41;
3451     for (size_t batch_size = 25; batch_size < 48; batch_size++) {
3452       VUnaryMicrokernelTester()
3453         .batch_size(batch_size)
3454         .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3455     }
3456   }
3457 
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X24,inplace)3458   TEST(F32_VELU__SSE41_RR2_LUT16_P3_X24, inplace) {
3459     TEST_REQUIRES_X86_SSE41;
3460     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3461       VUnaryMicrokernelTester()
3462         .batch_size(batch_size)
3463         .inplace(true)
3464         .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3465     }
3466   }
3467 
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X24,prescale)3468   TEST(F32_VELU__SSE41_RR2_LUT16_P3_X24, prescale) {
3469     TEST_REQUIRES_X86_SSE41;
3470     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
3471       for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3472         VUnaryMicrokernelTester()
3473           .batch_size(batch_size)
3474           .prescale(prescale)
3475           .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3476       }
3477     }
3478   }
3479 
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X24,alpha)3480   TEST(F32_VELU__SSE41_RR2_LUT16_P3_X24, alpha) {
3481     TEST_REQUIRES_X86_SSE41;
3482     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
3483       for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3484         VUnaryMicrokernelTester()
3485           .batch_size(batch_size)
3486           .alpha(alpha)
3487           .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3488       }
3489     }
3490   }
3491 
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X24,beta)3492   TEST(F32_VELU__SSE41_RR2_LUT16_P3_X24, beta) {
3493     TEST_REQUIRES_X86_SSE41;
3494     for (float beta : std::vector<float>({0.3f, 3.0f})) {
3495       for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3496         VUnaryMicrokernelTester()
3497           .batch_size(batch_size)
3498           .beta(beta)
3499           .Test(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24, xnn_init_f32_elu_sse2_rr2_lut16_p3_params);
3500       }
3501     }
3502   }
3503 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
3504 
3505 
3506 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE41_RR2_P6_X4,batch_eq_4)3507   TEST(F32_VELU__SSE41_RR2_P6_X4, batch_eq_4) {
3508     TEST_REQUIRES_X86_SSE41;
3509     VUnaryMicrokernelTester()
3510       .batch_size(4)
3511       .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x4, xnn_init_f32_elu_sse2_rr2_p6_params);
3512   }
3513 
TEST(F32_VELU__SSE41_RR2_P6_X4,batch_div_4)3514   TEST(F32_VELU__SSE41_RR2_P6_X4, batch_div_4) {
3515     TEST_REQUIRES_X86_SSE41;
3516     for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
3517       VUnaryMicrokernelTester()
3518         .batch_size(batch_size)
3519         .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x4, xnn_init_f32_elu_sse2_rr2_p6_params);
3520     }
3521   }
3522 
TEST(F32_VELU__SSE41_RR2_P6_X4,batch_lt_4)3523   TEST(F32_VELU__SSE41_RR2_P6_X4, batch_lt_4) {
3524     TEST_REQUIRES_X86_SSE41;
3525     for (size_t batch_size = 1; batch_size < 4; batch_size++) {
3526       VUnaryMicrokernelTester()
3527         .batch_size(batch_size)
3528         .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x4, xnn_init_f32_elu_sse2_rr2_p6_params);
3529     }
3530   }
3531 
TEST(F32_VELU__SSE41_RR2_P6_X4,batch_gt_4)3532   TEST(F32_VELU__SSE41_RR2_P6_X4, batch_gt_4) {
3533     TEST_REQUIRES_X86_SSE41;
3534     for (size_t batch_size = 5; batch_size < 8; batch_size++) {
3535       VUnaryMicrokernelTester()
3536         .batch_size(batch_size)
3537         .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x4, xnn_init_f32_elu_sse2_rr2_p6_params);
3538     }
3539   }
3540 
TEST(F32_VELU__SSE41_RR2_P6_X4,inplace)3541   TEST(F32_VELU__SSE41_RR2_P6_X4, inplace) {
3542     TEST_REQUIRES_X86_SSE41;
3543     for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
3544       VUnaryMicrokernelTester()
3545         .batch_size(batch_size)
3546         .inplace(true)
3547         .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x4, xnn_init_f32_elu_sse2_rr2_p6_params);
3548     }
3549   }
3550 
TEST(F32_VELU__SSE41_RR2_P6_X4,prescale)3551   TEST(F32_VELU__SSE41_RR2_P6_X4, prescale) {
3552     TEST_REQUIRES_X86_SSE41;
3553     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
3554       for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
3555         VUnaryMicrokernelTester()
3556           .batch_size(batch_size)
3557           .prescale(prescale)
3558           .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x4, xnn_init_f32_elu_sse2_rr2_p6_params);
3559       }
3560     }
3561   }
3562 
TEST(F32_VELU__SSE41_RR2_P6_X4,alpha)3563   TEST(F32_VELU__SSE41_RR2_P6_X4, alpha) {
3564     TEST_REQUIRES_X86_SSE41;
3565     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
3566       for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
3567         VUnaryMicrokernelTester()
3568           .batch_size(batch_size)
3569           .alpha(alpha)
3570           .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x4, xnn_init_f32_elu_sse2_rr2_p6_params);
3571       }
3572     }
3573   }
3574 
TEST(F32_VELU__SSE41_RR2_P6_X4,beta)3575   TEST(F32_VELU__SSE41_RR2_P6_X4, beta) {
3576     TEST_REQUIRES_X86_SSE41;
3577     for (float beta : std::vector<float>({0.3f, 3.0f})) {
3578       for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
3579         VUnaryMicrokernelTester()
3580           .batch_size(batch_size)
3581           .beta(beta)
3582           .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x4, xnn_init_f32_elu_sse2_rr2_p6_params);
3583       }
3584     }
3585   }
3586 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
3587 
3588 
3589 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE41_RR2_P6_X8,batch_eq_8)3590   TEST(F32_VELU__SSE41_RR2_P6_X8, batch_eq_8) {
3591     TEST_REQUIRES_X86_SSE41;
3592     VUnaryMicrokernelTester()
3593       .batch_size(8)
3594       .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x8, xnn_init_f32_elu_sse2_rr2_p6_params);
3595   }
3596 
TEST(F32_VELU__SSE41_RR2_P6_X8,batch_div_8)3597   TEST(F32_VELU__SSE41_RR2_P6_X8, batch_div_8) {
3598     TEST_REQUIRES_X86_SSE41;
3599     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
3600       VUnaryMicrokernelTester()
3601         .batch_size(batch_size)
3602         .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x8, xnn_init_f32_elu_sse2_rr2_p6_params);
3603     }
3604   }
3605 
TEST(F32_VELU__SSE41_RR2_P6_X8,batch_lt_8)3606   TEST(F32_VELU__SSE41_RR2_P6_X8, batch_lt_8) {
3607     TEST_REQUIRES_X86_SSE41;
3608     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
3609       VUnaryMicrokernelTester()
3610         .batch_size(batch_size)
3611         .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x8, xnn_init_f32_elu_sse2_rr2_p6_params);
3612     }
3613   }
3614 
TEST(F32_VELU__SSE41_RR2_P6_X8,batch_gt_8)3615   TEST(F32_VELU__SSE41_RR2_P6_X8, batch_gt_8) {
3616     TEST_REQUIRES_X86_SSE41;
3617     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
3618       VUnaryMicrokernelTester()
3619         .batch_size(batch_size)
3620         .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x8, xnn_init_f32_elu_sse2_rr2_p6_params);
3621     }
3622   }
3623 
TEST(F32_VELU__SSE41_RR2_P6_X8,inplace)3624   TEST(F32_VELU__SSE41_RR2_P6_X8, inplace) {
3625     TEST_REQUIRES_X86_SSE41;
3626     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3627       VUnaryMicrokernelTester()
3628         .batch_size(batch_size)
3629         .inplace(true)
3630         .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x8, xnn_init_f32_elu_sse2_rr2_p6_params);
3631     }
3632   }
3633 
TEST(F32_VELU__SSE41_RR2_P6_X8,prescale)3634   TEST(F32_VELU__SSE41_RR2_P6_X8, prescale) {
3635     TEST_REQUIRES_X86_SSE41;
3636     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
3637       for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3638         VUnaryMicrokernelTester()
3639           .batch_size(batch_size)
3640           .prescale(prescale)
3641           .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x8, xnn_init_f32_elu_sse2_rr2_p6_params);
3642       }
3643     }
3644   }
3645 
TEST(F32_VELU__SSE41_RR2_P6_X8,alpha)3646   TEST(F32_VELU__SSE41_RR2_P6_X8, alpha) {
3647     TEST_REQUIRES_X86_SSE41;
3648     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
3649       for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3650         VUnaryMicrokernelTester()
3651           .batch_size(batch_size)
3652           .alpha(alpha)
3653           .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x8, xnn_init_f32_elu_sse2_rr2_p6_params);
3654       }
3655     }
3656   }
3657 
TEST(F32_VELU__SSE41_RR2_P6_X8,beta)3658   TEST(F32_VELU__SSE41_RR2_P6_X8, beta) {
3659     TEST_REQUIRES_X86_SSE41;
3660     for (float beta : std::vector<float>({0.3f, 3.0f})) {
3661       for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3662         VUnaryMicrokernelTester()
3663           .batch_size(batch_size)
3664           .beta(beta)
3665           .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x8, xnn_init_f32_elu_sse2_rr2_p6_params);
3666       }
3667     }
3668   }
3669 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
3670 
3671 
3672 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE41_RR2_P6_X12,batch_eq_12)3673   TEST(F32_VELU__SSE41_RR2_P6_X12, batch_eq_12) {
3674     TEST_REQUIRES_X86_SSE41;
3675     VUnaryMicrokernelTester()
3676       .batch_size(12)
3677       .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x12, xnn_init_f32_elu_sse2_rr2_p6_params);
3678   }
3679 
TEST(F32_VELU__SSE41_RR2_P6_X12,batch_div_12)3680   TEST(F32_VELU__SSE41_RR2_P6_X12, batch_div_12) {
3681     TEST_REQUIRES_X86_SSE41;
3682     for (size_t batch_size = 24; batch_size < 120; batch_size += 12) {
3683       VUnaryMicrokernelTester()
3684         .batch_size(batch_size)
3685         .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x12, xnn_init_f32_elu_sse2_rr2_p6_params);
3686     }
3687   }
3688 
TEST(F32_VELU__SSE41_RR2_P6_X12,batch_lt_12)3689   TEST(F32_VELU__SSE41_RR2_P6_X12, batch_lt_12) {
3690     TEST_REQUIRES_X86_SSE41;
3691     for (size_t batch_size = 1; batch_size < 12; batch_size++) {
3692       VUnaryMicrokernelTester()
3693         .batch_size(batch_size)
3694         .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x12, xnn_init_f32_elu_sse2_rr2_p6_params);
3695     }
3696   }
3697 
TEST(F32_VELU__SSE41_RR2_P6_X12,batch_gt_12)3698   TEST(F32_VELU__SSE41_RR2_P6_X12, batch_gt_12) {
3699     TEST_REQUIRES_X86_SSE41;
3700     for (size_t batch_size = 13; batch_size < 24; batch_size++) {
3701       VUnaryMicrokernelTester()
3702         .batch_size(batch_size)
3703         .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x12, xnn_init_f32_elu_sse2_rr2_p6_params);
3704     }
3705   }
3706 
TEST(F32_VELU__SSE41_RR2_P6_X12,inplace)3707   TEST(F32_VELU__SSE41_RR2_P6_X12, inplace) {
3708     TEST_REQUIRES_X86_SSE41;
3709     for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
3710       VUnaryMicrokernelTester()
3711         .batch_size(batch_size)
3712         .inplace(true)
3713         .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x12, xnn_init_f32_elu_sse2_rr2_p6_params);
3714     }
3715   }
3716 
TEST(F32_VELU__SSE41_RR2_P6_X12,prescale)3717   TEST(F32_VELU__SSE41_RR2_P6_X12, prescale) {
3718     TEST_REQUIRES_X86_SSE41;
3719     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
3720       for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
3721         VUnaryMicrokernelTester()
3722           .batch_size(batch_size)
3723           .prescale(prescale)
3724           .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x12, xnn_init_f32_elu_sse2_rr2_p6_params);
3725       }
3726     }
3727   }
3728 
TEST(F32_VELU__SSE41_RR2_P6_X12,alpha)3729   TEST(F32_VELU__SSE41_RR2_P6_X12, alpha) {
3730     TEST_REQUIRES_X86_SSE41;
3731     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
3732       for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
3733         VUnaryMicrokernelTester()
3734           .batch_size(batch_size)
3735           .alpha(alpha)
3736           .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x12, xnn_init_f32_elu_sse2_rr2_p6_params);
3737       }
3738     }
3739   }
3740 
TEST(F32_VELU__SSE41_RR2_P6_X12,beta)3741   TEST(F32_VELU__SSE41_RR2_P6_X12, beta) {
3742     TEST_REQUIRES_X86_SSE41;
3743     for (float beta : std::vector<float>({0.3f, 3.0f})) {
3744       for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
3745         VUnaryMicrokernelTester()
3746           .batch_size(batch_size)
3747           .beta(beta)
3748           .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x12, xnn_init_f32_elu_sse2_rr2_p6_params);
3749       }
3750     }
3751   }
3752 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
3753 
3754 
3755 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE41_RR2_P6_X16,batch_eq_16)3756   TEST(F32_VELU__SSE41_RR2_P6_X16, batch_eq_16) {
3757     TEST_REQUIRES_X86_SSE41;
3758     VUnaryMicrokernelTester()
3759       .batch_size(16)
3760       .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x16, xnn_init_f32_elu_sse2_rr2_p6_params);
3761   }
3762 
TEST(F32_VELU__SSE41_RR2_P6_X16,batch_div_16)3763   TEST(F32_VELU__SSE41_RR2_P6_X16, batch_div_16) {
3764     TEST_REQUIRES_X86_SSE41;
3765     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
3766       VUnaryMicrokernelTester()
3767         .batch_size(batch_size)
3768         .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x16, xnn_init_f32_elu_sse2_rr2_p6_params);
3769     }
3770   }
3771 
TEST(F32_VELU__SSE41_RR2_P6_X16,batch_lt_16)3772   TEST(F32_VELU__SSE41_RR2_P6_X16, batch_lt_16) {
3773     TEST_REQUIRES_X86_SSE41;
3774     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
3775       VUnaryMicrokernelTester()
3776         .batch_size(batch_size)
3777         .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x16, xnn_init_f32_elu_sse2_rr2_p6_params);
3778     }
3779   }
3780 
TEST(F32_VELU__SSE41_RR2_P6_X16,batch_gt_16)3781   TEST(F32_VELU__SSE41_RR2_P6_X16, batch_gt_16) {
3782     TEST_REQUIRES_X86_SSE41;
3783     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
3784       VUnaryMicrokernelTester()
3785         .batch_size(batch_size)
3786         .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x16, xnn_init_f32_elu_sse2_rr2_p6_params);
3787     }
3788   }
3789 
TEST(F32_VELU__SSE41_RR2_P6_X16,inplace)3790   TEST(F32_VELU__SSE41_RR2_P6_X16, inplace) {
3791     TEST_REQUIRES_X86_SSE41;
3792     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3793       VUnaryMicrokernelTester()
3794         .batch_size(batch_size)
3795         .inplace(true)
3796         .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x16, xnn_init_f32_elu_sse2_rr2_p6_params);
3797     }
3798   }
3799 
TEST(F32_VELU__SSE41_RR2_P6_X16,prescale)3800   TEST(F32_VELU__SSE41_RR2_P6_X16, prescale) {
3801     TEST_REQUIRES_X86_SSE41;
3802     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
3803       for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3804         VUnaryMicrokernelTester()
3805           .batch_size(batch_size)
3806           .prescale(prescale)
3807           .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x16, xnn_init_f32_elu_sse2_rr2_p6_params);
3808       }
3809     }
3810   }
3811 
TEST(F32_VELU__SSE41_RR2_P6_X16,alpha)3812   TEST(F32_VELU__SSE41_RR2_P6_X16, alpha) {
3813     TEST_REQUIRES_X86_SSE41;
3814     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
3815       for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3816         VUnaryMicrokernelTester()
3817           .batch_size(batch_size)
3818           .alpha(alpha)
3819           .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x16, xnn_init_f32_elu_sse2_rr2_p6_params);
3820       }
3821     }
3822   }
3823 
TEST(F32_VELU__SSE41_RR2_P6_X16,beta)3824   TEST(F32_VELU__SSE41_RR2_P6_X16, beta) {
3825     TEST_REQUIRES_X86_SSE41;
3826     for (float beta : std::vector<float>({0.3f, 3.0f})) {
3827       for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3828         VUnaryMicrokernelTester()
3829           .batch_size(batch_size)
3830           .beta(beta)
3831           .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x16, xnn_init_f32_elu_sse2_rr2_p6_params);
3832       }
3833     }
3834   }
3835 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
3836 
3837 
3838 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE41_RR2_P6_X20,batch_eq_20)3839   TEST(F32_VELU__SSE41_RR2_P6_X20, batch_eq_20) {
3840     TEST_REQUIRES_X86_SSE41;
3841     VUnaryMicrokernelTester()
3842       .batch_size(20)
3843       .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x20, xnn_init_f32_elu_sse2_rr2_p6_params);
3844   }
3845 
TEST(F32_VELU__SSE41_RR2_P6_X20,batch_div_20)3846   TEST(F32_VELU__SSE41_RR2_P6_X20, batch_div_20) {
3847     TEST_REQUIRES_X86_SSE41;
3848     for (size_t batch_size = 40; batch_size < 200; batch_size += 20) {
3849       VUnaryMicrokernelTester()
3850         .batch_size(batch_size)
3851         .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x20, xnn_init_f32_elu_sse2_rr2_p6_params);
3852     }
3853   }
3854 
TEST(F32_VELU__SSE41_RR2_P6_X20,batch_lt_20)3855   TEST(F32_VELU__SSE41_RR2_P6_X20, batch_lt_20) {
3856     TEST_REQUIRES_X86_SSE41;
3857     for (size_t batch_size = 1; batch_size < 20; batch_size++) {
3858       VUnaryMicrokernelTester()
3859         .batch_size(batch_size)
3860         .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x20, xnn_init_f32_elu_sse2_rr2_p6_params);
3861     }
3862   }
3863 
TEST(F32_VELU__SSE41_RR2_P6_X20,batch_gt_20)3864   TEST(F32_VELU__SSE41_RR2_P6_X20, batch_gt_20) {
3865     TEST_REQUIRES_X86_SSE41;
3866     for (size_t batch_size = 21; batch_size < 40; batch_size++) {
3867       VUnaryMicrokernelTester()
3868         .batch_size(batch_size)
3869         .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x20, xnn_init_f32_elu_sse2_rr2_p6_params);
3870     }
3871   }
3872 
TEST(F32_VELU__SSE41_RR2_P6_X20,inplace)3873   TEST(F32_VELU__SSE41_RR2_P6_X20, inplace) {
3874     TEST_REQUIRES_X86_SSE41;
3875     for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
3876       VUnaryMicrokernelTester()
3877         .batch_size(batch_size)
3878         .inplace(true)
3879         .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x20, xnn_init_f32_elu_sse2_rr2_p6_params);
3880     }
3881   }
3882 
TEST(F32_VELU__SSE41_RR2_P6_X20,prescale)3883   TEST(F32_VELU__SSE41_RR2_P6_X20, prescale) {
3884     TEST_REQUIRES_X86_SSE41;
3885     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
3886       for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
3887         VUnaryMicrokernelTester()
3888           .batch_size(batch_size)
3889           .prescale(prescale)
3890           .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x20, xnn_init_f32_elu_sse2_rr2_p6_params);
3891       }
3892     }
3893   }
3894 
TEST(F32_VELU__SSE41_RR2_P6_X20,alpha)3895   TEST(F32_VELU__SSE41_RR2_P6_X20, alpha) {
3896     TEST_REQUIRES_X86_SSE41;
3897     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
3898       for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
3899         VUnaryMicrokernelTester()
3900           .batch_size(batch_size)
3901           .alpha(alpha)
3902           .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x20, xnn_init_f32_elu_sse2_rr2_p6_params);
3903       }
3904     }
3905   }
3906 
TEST(F32_VELU__SSE41_RR2_P6_X20,beta)3907   TEST(F32_VELU__SSE41_RR2_P6_X20, beta) {
3908     TEST_REQUIRES_X86_SSE41;
3909     for (float beta : std::vector<float>({0.3f, 3.0f})) {
3910       for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
3911         VUnaryMicrokernelTester()
3912           .batch_size(batch_size)
3913           .beta(beta)
3914           .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x20, xnn_init_f32_elu_sse2_rr2_p6_params);
3915       }
3916     }
3917   }
3918 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
3919 
3920 
3921 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE41_RR2_P6_X24,batch_eq_24)3922   TEST(F32_VELU__SSE41_RR2_P6_X24, batch_eq_24) {
3923     TEST_REQUIRES_X86_SSE41;
3924     VUnaryMicrokernelTester()
3925       .batch_size(24)
3926       .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x24, xnn_init_f32_elu_sse2_rr2_p6_params);
3927   }
3928 
TEST(F32_VELU__SSE41_RR2_P6_X24,batch_div_24)3929   TEST(F32_VELU__SSE41_RR2_P6_X24, batch_div_24) {
3930     TEST_REQUIRES_X86_SSE41;
3931     for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
3932       VUnaryMicrokernelTester()
3933         .batch_size(batch_size)
3934         .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x24, xnn_init_f32_elu_sse2_rr2_p6_params);
3935     }
3936   }
3937 
TEST(F32_VELU__SSE41_RR2_P6_X24,batch_lt_24)3938   TEST(F32_VELU__SSE41_RR2_P6_X24, batch_lt_24) {
3939     TEST_REQUIRES_X86_SSE41;
3940     for (size_t batch_size = 1; batch_size < 24; batch_size++) {
3941       VUnaryMicrokernelTester()
3942         .batch_size(batch_size)
3943         .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x24, xnn_init_f32_elu_sse2_rr2_p6_params);
3944     }
3945   }
3946 
TEST(F32_VELU__SSE41_RR2_P6_X24,batch_gt_24)3947   TEST(F32_VELU__SSE41_RR2_P6_X24, batch_gt_24) {
3948     TEST_REQUIRES_X86_SSE41;
3949     for (size_t batch_size = 25; batch_size < 48; batch_size++) {
3950       VUnaryMicrokernelTester()
3951         .batch_size(batch_size)
3952         .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x24, xnn_init_f32_elu_sse2_rr2_p6_params);
3953     }
3954   }
3955 
TEST(F32_VELU__SSE41_RR2_P6_X24,inplace)3956   TEST(F32_VELU__SSE41_RR2_P6_X24, inplace) {
3957     TEST_REQUIRES_X86_SSE41;
3958     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3959       VUnaryMicrokernelTester()
3960         .batch_size(batch_size)
3961         .inplace(true)
3962         .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x24, xnn_init_f32_elu_sse2_rr2_p6_params);
3963     }
3964   }
3965 
TEST(F32_VELU__SSE41_RR2_P6_X24,prescale)3966   TEST(F32_VELU__SSE41_RR2_P6_X24, prescale) {
3967     TEST_REQUIRES_X86_SSE41;
3968     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
3969       for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3970         VUnaryMicrokernelTester()
3971           .batch_size(batch_size)
3972           .prescale(prescale)
3973           .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x24, xnn_init_f32_elu_sse2_rr2_p6_params);
3974       }
3975     }
3976   }
3977 
TEST(F32_VELU__SSE41_RR2_P6_X24,alpha)3978   TEST(F32_VELU__SSE41_RR2_P6_X24, alpha) {
3979     TEST_REQUIRES_X86_SSE41;
3980     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
3981       for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3982         VUnaryMicrokernelTester()
3983           .batch_size(batch_size)
3984           .alpha(alpha)
3985           .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x24, xnn_init_f32_elu_sse2_rr2_p6_params);
3986       }
3987     }
3988   }
3989 
TEST(F32_VELU__SSE41_RR2_P6_X24,beta)3990   TEST(F32_VELU__SSE41_RR2_P6_X24, beta) {
3991     TEST_REQUIRES_X86_SSE41;
3992     for (float beta : std::vector<float>({0.3f, 3.0f})) {
3993       for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3994         VUnaryMicrokernelTester()
3995           .batch_size(batch_size)
3996           .beta(beta)
3997           .Test(xnn_f32_velu_ukernel__sse41_rr2_p6_x24, xnn_init_f32_elu_sse2_rr2_p6_params);
3998       }
3999     }
4000   }
4001 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
4002 
4003 
4004 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X8,batch_eq_8)4005   TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X8, batch_eq_8) {
4006     TEST_REQUIRES_X86_AVX;
4007     VUnaryMicrokernelTester()
4008       .batch_size(8)
4009       .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x8, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4010   }
4011 
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X8,batch_div_8)4012   TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X8, batch_div_8) {
4013     TEST_REQUIRES_X86_AVX;
4014     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
4015       VUnaryMicrokernelTester()
4016         .batch_size(batch_size)
4017         .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x8, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4018     }
4019   }
4020 
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X8,batch_lt_8)4021   TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X8, batch_lt_8) {
4022     TEST_REQUIRES_X86_AVX;
4023     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
4024       VUnaryMicrokernelTester()
4025         .batch_size(batch_size)
4026         .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x8, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4027     }
4028   }
4029 
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X8,batch_gt_8)4030   TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X8, batch_gt_8) {
4031     TEST_REQUIRES_X86_AVX;
4032     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
4033       VUnaryMicrokernelTester()
4034         .batch_size(batch_size)
4035         .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x8, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4036     }
4037   }
4038 
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X8,inplace)4039   TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X8, inplace) {
4040     TEST_REQUIRES_X86_AVX;
4041     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4042       VUnaryMicrokernelTester()
4043         .batch_size(batch_size)
4044         .inplace(true)
4045         .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x8, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4046     }
4047   }
4048 
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X8,prescale)4049   TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X8, prescale) {
4050     TEST_REQUIRES_X86_AVX;
4051     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
4052       for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4053         VUnaryMicrokernelTester()
4054           .batch_size(batch_size)
4055           .prescale(prescale)
4056           .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x8, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4057       }
4058     }
4059   }
4060 
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X8,alpha)4061   TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X8, alpha) {
4062     TEST_REQUIRES_X86_AVX;
4063     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
4064       for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4065         VUnaryMicrokernelTester()
4066           .batch_size(batch_size)
4067           .alpha(alpha)
4068           .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x8, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4069       }
4070     }
4071   }
4072 
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X8,beta)4073   TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X8, beta) {
4074     TEST_REQUIRES_X86_AVX;
4075     for (float beta : std::vector<float>({0.3f, 3.0f})) {
4076       for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4077         VUnaryMicrokernelTester()
4078           .batch_size(batch_size)
4079           .beta(beta)
4080           .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x8, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4081       }
4082     }
4083   }
4084 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
4085 
4086 
4087 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X16,batch_eq_16)4088   TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X16, batch_eq_16) {
4089     TEST_REQUIRES_X86_AVX;
4090     VUnaryMicrokernelTester()
4091       .batch_size(16)
4092       .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x16, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4093   }
4094 
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X16,batch_div_16)4095   TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X16, batch_div_16) {
4096     TEST_REQUIRES_X86_AVX;
4097     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
4098       VUnaryMicrokernelTester()
4099         .batch_size(batch_size)
4100         .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x16, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4101     }
4102   }
4103 
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X16,batch_lt_16)4104   TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X16, batch_lt_16) {
4105     TEST_REQUIRES_X86_AVX;
4106     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
4107       VUnaryMicrokernelTester()
4108         .batch_size(batch_size)
4109         .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x16, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4110     }
4111   }
4112 
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X16,batch_gt_16)4113   TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X16, batch_gt_16) {
4114     TEST_REQUIRES_X86_AVX;
4115     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
4116       VUnaryMicrokernelTester()
4117         .batch_size(batch_size)
4118         .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x16, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4119     }
4120   }
4121 
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X16,inplace)4122   TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X16, inplace) {
4123     TEST_REQUIRES_X86_AVX;
4124     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4125       VUnaryMicrokernelTester()
4126         .batch_size(batch_size)
4127         .inplace(true)
4128         .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x16, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4129     }
4130   }
4131 
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X16,prescale)4132   TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X16, prescale) {
4133     TEST_REQUIRES_X86_AVX;
4134     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
4135       for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4136         VUnaryMicrokernelTester()
4137           .batch_size(batch_size)
4138           .prescale(prescale)
4139           .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x16, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4140       }
4141     }
4142   }
4143 
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X16,alpha)4144   TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X16, alpha) {
4145     TEST_REQUIRES_X86_AVX;
4146     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
4147       for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4148         VUnaryMicrokernelTester()
4149           .batch_size(batch_size)
4150           .alpha(alpha)
4151           .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x16, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4152       }
4153     }
4154   }
4155 
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X16,beta)4156   TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X16, beta) {
4157     TEST_REQUIRES_X86_AVX;
4158     for (float beta : std::vector<float>({0.3f, 3.0f})) {
4159       for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4160         VUnaryMicrokernelTester()
4161           .batch_size(batch_size)
4162           .beta(beta)
4163           .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x16, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4164       }
4165     }
4166   }
4167 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
4168 
4169 
4170 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X24,batch_eq_24)4171   TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X24, batch_eq_24) {
4172     TEST_REQUIRES_X86_AVX;
4173     VUnaryMicrokernelTester()
4174       .batch_size(24)
4175       .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x24, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4176   }
4177 
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X24,batch_div_24)4178   TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X24, batch_div_24) {
4179     TEST_REQUIRES_X86_AVX;
4180     for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
4181       VUnaryMicrokernelTester()
4182         .batch_size(batch_size)
4183         .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x24, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4184     }
4185   }
4186 
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X24,batch_lt_24)4187   TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X24, batch_lt_24) {
4188     TEST_REQUIRES_X86_AVX;
4189     for (size_t batch_size = 1; batch_size < 24; batch_size++) {
4190       VUnaryMicrokernelTester()
4191         .batch_size(batch_size)
4192         .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x24, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4193     }
4194   }
4195 
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X24,batch_gt_24)4196   TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X24, batch_gt_24) {
4197     TEST_REQUIRES_X86_AVX;
4198     for (size_t batch_size = 25; batch_size < 48; batch_size++) {
4199       VUnaryMicrokernelTester()
4200         .batch_size(batch_size)
4201         .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x24, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4202     }
4203   }
4204 
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X24,inplace)4205   TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X24, inplace) {
4206     TEST_REQUIRES_X86_AVX;
4207     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4208       VUnaryMicrokernelTester()
4209         .batch_size(batch_size)
4210         .inplace(true)
4211         .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x24, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4212     }
4213   }
4214 
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X24,prescale)4215   TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X24, prescale) {
4216     TEST_REQUIRES_X86_AVX;
4217     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
4218       for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4219         VUnaryMicrokernelTester()
4220           .batch_size(batch_size)
4221           .prescale(prescale)
4222           .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x24, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4223       }
4224     }
4225   }
4226 
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X24,alpha)4227   TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X24, alpha) {
4228     TEST_REQUIRES_X86_AVX;
4229     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
4230       for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4231         VUnaryMicrokernelTester()
4232           .batch_size(batch_size)
4233           .alpha(alpha)
4234           .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x24, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4235       }
4236     }
4237   }
4238 
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X24,beta)4239   TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X24, beta) {
4240     TEST_REQUIRES_X86_AVX;
4241     for (float beta : std::vector<float>({0.3f, 3.0f})) {
4242       for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4243         VUnaryMicrokernelTester()
4244           .batch_size(batch_size)
4245           .beta(beta)
4246           .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x24, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4247       }
4248     }
4249   }
4250 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
4251 
4252 
4253 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X32,batch_eq_32)4254   TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X32, batch_eq_32) {
4255     TEST_REQUIRES_X86_AVX;
4256     VUnaryMicrokernelTester()
4257       .batch_size(32)
4258       .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x32, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4259   }
4260 
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X32,batch_div_32)4261   TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X32, batch_div_32) {
4262     TEST_REQUIRES_X86_AVX;
4263     for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
4264       VUnaryMicrokernelTester()
4265         .batch_size(batch_size)
4266         .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x32, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4267     }
4268   }
4269 
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X32,batch_lt_32)4270   TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X32, batch_lt_32) {
4271     TEST_REQUIRES_X86_AVX;
4272     for (size_t batch_size = 1; batch_size < 32; batch_size++) {
4273       VUnaryMicrokernelTester()
4274         .batch_size(batch_size)
4275         .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x32, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4276     }
4277   }
4278 
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X32,batch_gt_32)4279   TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X32, batch_gt_32) {
4280     TEST_REQUIRES_X86_AVX;
4281     for (size_t batch_size = 33; batch_size < 64; batch_size++) {
4282       VUnaryMicrokernelTester()
4283         .batch_size(batch_size)
4284         .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x32, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4285     }
4286   }
4287 
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X32,inplace)4288   TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X32, inplace) {
4289     TEST_REQUIRES_X86_AVX;
4290     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4291       VUnaryMicrokernelTester()
4292         .batch_size(batch_size)
4293         .inplace(true)
4294         .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x32, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4295     }
4296   }
4297 
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X32,prescale)4298   TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X32, prescale) {
4299     TEST_REQUIRES_X86_AVX;
4300     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
4301       for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4302         VUnaryMicrokernelTester()
4303           .batch_size(batch_size)
4304           .prescale(prescale)
4305           .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x32, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4306       }
4307     }
4308   }
4309 
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X32,alpha)4310   TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X32, alpha) {
4311     TEST_REQUIRES_X86_AVX;
4312     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
4313       for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4314         VUnaryMicrokernelTester()
4315           .batch_size(batch_size)
4316           .alpha(alpha)
4317           .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x32, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4318       }
4319     }
4320   }
4321 
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X32,beta)4322   TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X32, beta) {
4323     TEST_REQUIRES_X86_AVX;
4324     for (float beta : std::vector<float>({0.3f, 3.0f})) {
4325       for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4326         VUnaryMicrokernelTester()
4327           .batch_size(batch_size)
4328           .beta(beta)
4329           .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x32, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4330       }
4331     }
4332   }
4333 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
4334 
4335 
4336 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X40,batch_eq_40)4337   TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X40, batch_eq_40) {
4338     TEST_REQUIRES_X86_AVX;
4339     VUnaryMicrokernelTester()
4340       .batch_size(40)
4341       .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x40, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4342   }
4343 
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X40,batch_div_40)4344   TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X40, batch_div_40) {
4345     TEST_REQUIRES_X86_AVX;
4346     for (size_t batch_size = 80; batch_size < 400; batch_size += 40) {
4347       VUnaryMicrokernelTester()
4348         .batch_size(batch_size)
4349         .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x40, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4350     }
4351   }
4352 
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X40,batch_lt_40)4353   TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X40, batch_lt_40) {
4354     TEST_REQUIRES_X86_AVX;
4355     for (size_t batch_size = 1; batch_size < 40; batch_size++) {
4356       VUnaryMicrokernelTester()
4357         .batch_size(batch_size)
4358         .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x40, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4359     }
4360   }
4361 
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X40,batch_gt_40)4362   TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X40, batch_gt_40) {
4363     TEST_REQUIRES_X86_AVX;
4364     for (size_t batch_size = 41; batch_size < 80; batch_size++) {
4365       VUnaryMicrokernelTester()
4366         .batch_size(batch_size)
4367         .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x40, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4368     }
4369   }
4370 
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X40,inplace)4371   TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X40, inplace) {
4372     TEST_REQUIRES_X86_AVX;
4373     for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
4374       VUnaryMicrokernelTester()
4375         .batch_size(batch_size)
4376         .inplace(true)
4377         .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x40, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4378     }
4379   }
4380 
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X40,prescale)4381   TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X40, prescale) {
4382     TEST_REQUIRES_X86_AVX;
4383     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
4384       for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
4385         VUnaryMicrokernelTester()
4386           .batch_size(batch_size)
4387           .prescale(prescale)
4388           .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x40, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4389       }
4390     }
4391   }
4392 
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X40,alpha)4393   TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X40, alpha) {
4394     TEST_REQUIRES_X86_AVX;
4395     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
4396       for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
4397         VUnaryMicrokernelTester()
4398           .batch_size(batch_size)
4399           .alpha(alpha)
4400           .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x40, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4401       }
4402     }
4403   }
4404 
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X40,beta)4405   TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X40, beta) {
4406     TEST_REQUIRES_X86_AVX;
4407     for (float beta : std::vector<float>({0.3f, 3.0f})) {
4408       for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
4409         VUnaryMicrokernelTester()
4410           .batch_size(batch_size)
4411           .beta(beta)
4412           .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x40, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4413       }
4414     }
4415   }
4416 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
4417 
4418 
4419 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X48,batch_eq_48)4420   TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X48, batch_eq_48) {
4421     TEST_REQUIRES_X86_AVX;
4422     VUnaryMicrokernelTester()
4423       .batch_size(48)
4424       .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x48, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4425   }
4426 
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X48,batch_div_48)4427   TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X48, batch_div_48) {
4428     TEST_REQUIRES_X86_AVX;
4429     for (size_t batch_size = 96; batch_size < 480; batch_size += 48) {
4430       VUnaryMicrokernelTester()
4431         .batch_size(batch_size)
4432         .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x48, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4433     }
4434   }
4435 
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X48,batch_lt_48)4436   TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X48, batch_lt_48) {
4437     TEST_REQUIRES_X86_AVX;
4438     for (size_t batch_size = 1; batch_size < 48; batch_size++) {
4439       VUnaryMicrokernelTester()
4440         .batch_size(batch_size)
4441         .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x48, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4442     }
4443   }
4444 
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X48,batch_gt_48)4445   TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X48, batch_gt_48) {
4446     TEST_REQUIRES_X86_AVX;
4447     for (size_t batch_size = 49; batch_size < 96; batch_size++) {
4448       VUnaryMicrokernelTester()
4449         .batch_size(batch_size)
4450         .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x48, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4451     }
4452   }
4453 
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X48,inplace)4454   TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X48, inplace) {
4455     TEST_REQUIRES_X86_AVX;
4456     for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
4457       VUnaryMicrokernelTester()
4458         .batch_size(batch_size)
4459         .inplace(true)
4460         .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x48, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4461     }
4462   }
4463 
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X48,prescale)4464   TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X48, prescale) {
4465     TEST_REQUIRES_X86_AVX;
4466     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
4467       for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
4468         VUnaryMicrokernelTester()
4469           .batch_size(batch_size)
4470           .prescale(prescale)
4471           .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x48, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4472       }
4473     }
4474   }
4475 
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X48,alpha)4476   TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X48, alpha) {
4477     TEST_REQUIRES_X86_AVX;
4478     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
4479       for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
4480         VUnaryMicrokernelTester()
4481           .batch_size(batch_size)
4482           .alpha(alpha)
4483           .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x48, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4484       }
4485     }
4486   }
4487 
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X48,beta)4488   TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X48, beta) {
4489     TEST_REQUIRES_X86_AVX;
4490     for (float beta : std::vector<float>({0.3f, 3.0f})) {
4491       for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
4492         VUnaryMicrokernelTester()
4493           .batch_size(batch_size)
4494           .beta(beta)
4495           .Test(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x48, xnn_init_f32_elu_avx_rr2_lut4_p4_params);
4496       }
4497     }
4498   }
4499 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
4500 
4501 
4502 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX_RR2_LUT16_P3_X8,batch_eq_8)4503   TEST(F32_VELU__AVX_RR2_LUT16_P3_X8, batch_eq_8) {
4504     TEST_REQUIRES_X86_AVX;
4505     VUnaryMicrokernelTester()
4506       .batch_size(8)
4507       .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4508   }
4509 
TEST(F32_VELU__AVX_RR2_LUT16_P3_X8,batch_div_8)4510   TEST(F32_VELU__AVX_RR2_LUT16_P3_X8, batch_div_8) {
4511     TEST_REQUIRES_X86_AVX;
4512     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
4513       VUnaryMicrokernelTester()
4514         .batch_size(batch_size)
4515         .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4516     }
4517   }
4518 
TEST(F32_VELU__AVX_RR2_LUT16_P3_X8,batch_lt_8)4519   TEST(F32_VELU__AVX_RR2_LUT16_P3_X8, batch_lt_8) {
4520     TEST_REQUIRES_X86_AVX;
4521     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
4522       VUnaryMicrokernelTester()
4523         .batch_size(batch_size)
4524         .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4525     }
4526   }
4527 
TEST(F32_VELU__AVX_RR2_LUT16_P3_X8,batch_gt_8)4528   TEST(F32_VELU__AVX_RR2_LUT16_P3_X8, batch_gt_8) {
4529     TEST_REQUIRES_X86_AVX;
4530     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
4531       VUnaryMicrokernelTester()
4532         .batch_size(batch_size)
4533         .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4534     }
4535   }
4536 
TEST(F32_VELU__AVX_RR2_LUT16_P3_X8,inplace)4537   TEST(F32_VELU__AVX_RR2_LUT16_P3_X8, inplace) {
4538     TEST_REQUIRES_X86_AVX;
4539     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4540       VUnaryMicrokernelTester()
4541         .batch_size(batch_size)
4542         .inplace(true)
4543         .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4544     }
4545   }
4546 
TEST(F32_VELU__AVX_RR2_LUT16_P3_X8,prescale)4547   TEST(F32_VELU__AVX_RR2_LUT16_P3_X8, prescale) {
4548     TEST_REQUIRES_X86_AVX;
4549     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
4550       for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4551         VUnaryMicrokernelTester()
4552           .batch_size(batch_size)
4553           .prescale(prescale)
4554           .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4555       }
4556     }
4557   }
4558 
TEST(F32_VELU__AVX_RR2_LUT16_P3_X8,alpha)4559   TEST(F32_VELU__AVX_RR2_LUT16_P3_X8, alpha) {
4560     TEST_REQUIRES_X86_AVX;
4561     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
4562       for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4563         VUnaryMicrokernelTester()
4564           .batch_size(batch_size)
4565           .alpha(alpha)
4566           .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4567       }
4568     }
4569   }
4570 
TEST(F32_VELU__AVX_RR2_LUT16_P3_X8,beta)4571   TEST(F32_VELU__AVX_RR2_LUT16_P3_X8, beta) {
4572     TEST_REQUIRES_X86_AVX;
4573     for (float beta : std::vector<float>({0.3f, 3.0f})) {
4574       for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4575         VUnaryMicrokernelTester()
4576           .batch_size(batch_size)
4577           .beta(beta)
4578           .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4579       }
4580     }
4581   }
4582 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
4583 
4584 
4585 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX_RR2_LUT16_P3_X16,batch_eq_16)4586   TEST(F32_VELU__AVX_RR2_LUT16_P3_X16, batch_eq_16) {
4587     TEST_REQUIRES_X86_AVX;
4588     VUnaryMicrokernelTester()
4589       .batch_size(16)
4590       .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4591   }
4592 
TEST(F32_VELU__AVX_RR2_LUT16_P3_X16,batch_div_16)4593   TEST(F32_VELU__AVX_RR2_LUT16_P3_X16, batch_div_16) {
4594     TEST_REQUIRES_X86_AVX;
4595     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
4596       VUnaryMicrokernelTester()
4597         .batch_size(batch_size)
4598         .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4599     }
4600   }
4601 
TEST(F32_VELU__AVX_RR2_LUT16_P3_X16,batch_lt_16)4602   TEST(F32_VELU__AVX_RR2_LUT16_P3_X16, batch_lt_16) {
4603     TEST_REQUIRES_X86_AVX;
4604     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
4605       VUnaryMicrokernelTester()
4606         .batch_size(batch_size)
4607         .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4608     }
4609   }
4610 
TEST(F32_VELU__AVX_RR2_LUT16_P3_X16,batch_gt_16)4611   TEST(F32_VELU__AVX_RR2_LUT16_P3_X16, batch_gt_16) {
4612     TEST_REQUIRES_X86_AVX;
4613     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
4614       VUnaryMicrokernelTester()
4615         .batch_size(batch_size)
4616         .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4617     }
4618   }
4619 
TEST(F32_VELU__AVX_RR2_LUT16_P3_X16,inplace)4620   TEST(F32_VELU__AVX_RR2_LUT16_P3_X16, inplace) {
4621     TEST_REQUIRES_X86_AVX;
4622     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4623       VUnaryMicrokernelTester()
4624         .batch_size(batch_size)
4625         .inplace(true)
4626         .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4627     }
4628   }
4629 
TEST(F32_VELU__AVX_RR2_LUT16_P3_X16,prescale)4630   TEST(F32_VELU__AVX_RR2_LUT16_P3_X16, prescale) {
4631     TEST_REQUIRES_X86_AVX;
4632     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
4633       for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4634         VUnaryMicrokernelTester()
4635           .batch_size(batch_size)
4636           .prescale(prescale)
4637           .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4638       }
4639     }
4640   }
4641 
TEST(F32_VELU__AVX_RR2_LUT16_P3_X16,alpha)4642   TEST(F32_VELU__AVX_RR2_LUT16_P3_X16, alpha) {
4643     TEST_REQUIRES_X86_AVX;
4644     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
4645       for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4646         VUnaryMicrokernelTester()
4647           .batch_size(batch_size)
4648           .alpha(alpha)
4649           .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4650       }
4651     }
4652   }
4653 
TEST(F32_VELU__AVX_RR2_LUT16_P3_X16,beta)4654   TEST(F32_VELU__AVX_RR2_LUT16_P3_X16, beta) {
4655     TEST_REQUIRES_X86_AVX;
4656     for (float beta : std::vector<float>({0.3f, 3.0f})) {
4657       for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4658         VUnaryMicrokernelTester()
4659           .batch_size(batch_size)
4660           .beta(beta)
4661           .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4662       }
4663     }
4664   }
4665 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
4666 
4667 
4668 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX_RR2_LUT16_P3_X24,batch_eq_24)4669   TEST(F32_VELU__AVX_RR2_LUT16_P3_X24, batch_eq_24) {
4670     TEST_REQUIRES_X86_AVX;
4671     VUnaryMicrokernelTester()
4672       .batch_size(24)
4673       .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4674   }
4675 
TEST(F32_VELU__AVX_RR2_LUT16_P3_X24,batch_div_24)4676   TEST(F32_VELU__AVX_RR2_LUT16_P3_X24, batch_div_24) {
4677     TEST_REQUIRES_X86_AVX;
4678     for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
4679       VUnaryMicrokernelTester()
4680         .batch_size(batch_size)
4681         .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4682     }
4683   }
4684 
TEST(F32_VELU__AVX_RR2_LUT16_P3_X24,batch_lt_24)4685   TEST(F32_VELU__AVX_RR2_LUT16_P3_X24, batch_lt_24) {
4686     TEST_REQUIRES_X86_AVX;
4687     for (size_t batch_size = 1; batch_size < 24; batch_size++) {
4688       VUnaryMicrokernelTester()
4689         .batch_size(batch_size)
4690         .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4691     }
4692   }
4693 
TEST(F32_VELU__AVX_RR2_LUT16_P3_X24,batch_gt_24)4694   TEST(F32_VELU__AVX_RR2_LUT16_P3_X24, batch_gt_24) {
4695     TEST_REQUIRES_X86_AVX;
4696     for (size_t batch_size = 25; batch_size < 48; batch_size++) {
4697       VUnaryMicrokernelTester()
4698         .batch_size(batch_size)
4699         .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4700     }
4701   }
4702 
TEST(F32_VELU__AVX_RR2_LUT16_P3_X24,inplace)4703   TEST(F32_VELU__AVX_RR2_LUT16_P3_X24, inplace) {
4704     TEST_REQUIRES_X86_AVX;
4705     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4706       VUnaryMicrokernelTester()
4707         .batch_size(batch_size)
4708         .inplace(true)
4709         .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4710     }
4711   }
4712 
TEST(F32_VELU__AVX_RR2_LUT16_P3_X24,prescale)4713   TEST(F32_VELU__AVX_RR2_LUT16_P3_X24, prescale) {
4714     TEST_REQUIRES_X86_AVX;
4715     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
4716       for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4717         VUnaryMicrokernelTester()
4718           .batch_size(batch_size)
4719           .prescale(prescale)
4720           .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4721       }
4722     }
4723   }
4724 
TEST(F32_VELU__AVX_RR2_LUT16_P3_X24,alpha)4725   TEST(F32_VELU__AVX_RR2_LUT16_P3_X24, alpha) {
4726     TEST_REQUIRES_X86_AVX;
4727     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
4728       for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4729         VUnaryMicrokernelTester()
4730           .batch_size(batch_size)
4731           .alpha(alpha)
4732           .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4733       }
4734     }
4735   }
4736 
TEST(F32_VELU__AVX_RR2_LUT16_P3_X24,beta)4737   TEST(F32_VELU__AVX_RR2_LUT16_P3_X24, beta) {
4738     TEST_REQUIRES_X86_AVX;
4739     for (float beta : std::vector<float>({0.3f, 3.0f})) {
4740       for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4741         VUnaryMicrokernelTester()
4742           .batch_size(batch_size)
4743           .beta(beta)
4744           .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4745       }
4746     }
4747   }
4748 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
4749 
4750 
4751 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX_RR2_LUT16_P3_X32,batch_eq_32)4752   TEST(F32_VELU__AVX_RR2_LUT16_P3_X32, batch_eq_32) {
4753     TEST_REQUIRES_X86_AVX;
4754     VUnaryMicrokernelTester()
4755       .batch_size(32)
4756       .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4757   }
4758 
TEST(F32_VELU__AVX_RR2_LUT16_P3_X32,batch_div_32)4759   TEST(F32_VELU__AVX_RR2_LUT16_P3_X32, batch_div_32) {
4760     TEST_REQUIRES_X86_AVX;
4761     for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
4762       VUnaryMicrokernelTester()
4763         .batch_size(batch_size)
4764         .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4765     }
4766   }
4767 
TEST(F32_VELU__AVX_RR2_LUT16_P3_X32,batch_lt_32)4768   TEST(F32_VELU__AVX_RR2_LUT16_P3_X32, batch_lt_32) {
4769     TEST_REQUIRES_X86_AVX;
4770     for (size_t batch_size = 1; batch_size < 32; batch_size++) {
4771       VUnaryMicrokernelTester()
4772         .batch_size(batch_size)
4773         .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4774     }
4775   }
4776 
TEST(F32_VELU__AVX_RR2_LUT16_P3_X32,batch_gt_32)4777   TEST(F32_VELU__AVX_RR2_LUT16_P3_X32, batch_gt_32) {
4778     TEST_REQUIRES_X86_AVX;
4779     for (size_t batch_size = 33; batch_size < 64; batch_size++) {
4780       VUnaryMicrokernelTester()
4781         .batch_size(batch_size)
4782         .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4783     }
4784   }
4785 
TEST(F32_VELU__AVX_RR2_LUT16_P3_X32,inplace)4786   TEST(F32_VELU__AVX_RR2_LUT16_P3_X32, inplace) {
4787     TEST_REQUIRES_X86_AVX;
4788     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4789       VUnaryMicrokernelTester()
4790         .batch_size(batch_size)
4791         .inplace(true)
4792         .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4793     }
4794   }
4795 
TEST(F32_VELU__AVX_RR2_LUT16_P3_X32,prescale)4796   TEST(F32_VELU__AVX_RR2_LUT16_P3_X32, prescale) {
4797     TEST_REQUIRES_X86_AVX;
4798     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
4799       for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4800         VUnaryMicrokernelTester()
4801           .batch_size(batch_size)
4802           .prescale(prescale)
4803           .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4804       }
4805     }
4806   }
4807 
TEST(F32_VELU__AVX_RR2_LUT16_P3_X32,alpha)4808   TEST(F32_VELU__AVX_RR2_LUT16_P3_X32, alpha) {
4809     TEST_REQUIRES_X86_AVX;
4810     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
4811       for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4812         VUnaryMicrokernelTester()
4813           .batch_size(batch_size)
4814           .alpha(alpha)
4815           .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4816       }
4817     }
4818   }
4819 
TEST(F32_VELU__AVX_RR2_LUT16_P3_X32,beta)4820   TEST(F32_VELU__AVX_RR2_LUT16_P3_X32, beta) {
4821     TEST_REQUIRES_X86_AVX;
4822     for (float beta : std::vector<float>({0.3f, 3.0f})) {
4823       for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4824         VUnaryMicrokernelTester()
4825           .batch_size(batch_size)
4826           .beta(beta)
4827           .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4828       }
4829     }
4830   }
4831 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
4832 
4833 
4834 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX_RR2_LUT16_P3_X40,batch_eq_40)4835   TEST(F32_VELU__AVX_RR2_LUT16_P3_X40, batch_eq_40) {
4836     TEST_REQUIRES_X86_AVX;
4837     VUnaryMicrokernelTester()
4838       .batch_size(40)
4839       .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4840   }
4841 
TEST(F32_VELU__AVX_RR2_LUT16_P3_X40,batch_div_40)4842   TEST(F32_VELU__AVX_RR2_LUT16_P3_X40, batch_div_40) {
4843     TEST_REQUIRES_X86_AVX;
4844     for (size_t batch_size = 80; batch_size < 400; batch_size += 40) {
4845       VUnaryMicrokernelTester()
4846         .batch_size(batch_size)
4847         .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4848     }
4849   }
4850 
TEST(F32_VELU__AVX_RR2_LUT16_P3_X40,batch_lt_40)4851   TEST(F32_VELU__AVX_RR2_LUT16_P3_X40, batch_lt_40) {
4852     TEST_REQUIRES_X86_AVX;
4853     for (size_t batch_size = 1; batch_size < 40; batch_size++) {
4854       VUnaryMicrokernelTester()
4855         .batch_size(batch_size)
4856         .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4857     }
4858   }
4859 
TEST(F32_VELU__AVX_RR2_LUT16_P3_X40,batch_gt_40)4860   TEST(F32_VELU__AVX_RR2_LUT16_P3_X40, batch_gt_40) {
4861     TEST_REQUIRES_X86_AVX;
4862     for (size_t batch_size = 41; batch_size < 80; batch_size++) {
4863       VUnaryMicrokernelTester()
4864         .batch_size(batch_size)
4865         .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4866     }
4867   }
4868 
TEST(F32_VELU__AVX_RR2_LUT16_P3_X40,inplace)4869   TEST(F32_VELU__AVX_RR2_LUT16_P3_X40, inplace) {
4870     TEST_REQUIRES_X86_AVX;
4871     for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
4872       VUnaryMicrokernelTester()
4873         .batch_size(batch_size)
4874         .inplace(true)
4875         .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4876     }
4877   }
4878 
TEST(F32_VELU__AVX_RR2_LUT16_P3_X40,prescale)4879   TEST(F32_VELU__AVX_RR2_LUT16_P3_X40, prescale) {
4880     TEST_REQUIRES_X86_AVX;
4881     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
4882       for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
4883         VUnaryMicrokernelTester()
4884           .batch_size(batch_size)
4885           .prescale(prescale)
4886           .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4887       }
4888     }
4889   }
4890 
TEST(F32_VELU__AVX_RR2_LUT16_P3_X40,alpha)4891   TEST(F32_VELU__AVX_RR2_LUT16_P3_X40, alpha) {
4892     TEST_REQUIRES_X86_AVX;
4893     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
4894       for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
4895         VUnaryMicrokernelTester()
4896           .batch_size(batch_size)
4897           .alpha(alpha)
4898           .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4899       }
4900     }
4901   }
4902 
TEST(F32_VELU__AVX_RR2_LUT16_P3_X40,beta)4903   TEST(F32_VELU__AVX_RR2_LUT16_P3_X40, beta) {
4904     TEST_REQUIRES_X86_AVX;
4905     for (float beta : std::vector<float>({0.3f, 3.0f})) {
4906       for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
4907         VUnaryMicrokernelTester()
4908           .batch_size(batch_size)
4909           .beta(beta)
4910           .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4911       }
4912     }
4913   }
4914 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
4915 
4916 
4917 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX_RR2_LUT16_P3_X48,batch_eq_48)4918   TEST(F32_VELU__AVX_RR2_LUT16_P3_X48, batch_eq_48) {
4919     TEST_REQUIRES_X86_AVX;
4920     VUnaryMicrokernelTester()
4921       .batch_size(48)
4922       .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4923   }
4924 
TEST(F32_VELU__AVX_RR2_LUT16_P3_X48,batch_div_48)4925   TEST(F32_VELU__AVX_RR2_LUT16_P3_X48, batch_div_48) {
4926     TEST_REQUIRES_X86_AVX;
4927     for (size_t batch_size = 96; batch_size < 480; batch_size += 48) {
4928       VUnaryMicrokernelTester()
4929         .batch_size(batch_size)
4930         .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4931     }
4932   }
4933 
TEST(F32_VELU__AVX_RR2_LUT16_P3_X48,batch_lt_48)4934   TEST(F32_VELU__AVX_RR2_LUT16_P3_X48, batch_lt_48) {
4935     TEST_REQUIRES_X86_AVX;
4936     for (size_t batch_size = 1; batch_size < 48; batch_size++) {
4937       VUnaryMicrokernelTester()
4938         .batch_size(batch_size)
4939         .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4940     }
4941   }
4942 
TEST(F32_VELU__AVX_RR2_LUT16_P3_X48,batch_gt_48)4943   TEST(F32_VELU__AVX_RR2_LUT16_P3_X48, batch_gt_48) {
4944     TEST_REQUIRES_X86_AVX;
4945     for (size_t batch_size = 49; batch_size < 96; batch_size++) {
4946       VUnaryMicrokernelTester()
4947         .batch_size(batch_size)
4948         .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4949     }
4950   }
4951 
TEST(F32_VELU__AVX_RR2_LUT16_P3_X48,inplace)4952   TEST(F32_VELU__AVX_RR2_LUT16_P3_X48, inplace) {
4953     TEST_REQUIRES_X86_AVX;
4954     for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
4955       VUnaryMicrokernelTester()
4956         .batch_size(batch_size)
4957         .inplace(true)
4958         .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4959     }
4960   }
4961 
TEST(F32_VELU__AVX_RR2_LUT16_P3_X48,prescale)4962   TEST(F32_VELU__AVX_RR2_LUT16_P3_X48, prescale) {
4963     TEST_REQUIRES_X86_AVX;
4964     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
4965       for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
4966         VUnaryMicrokernelTester()
4967           .batch_size(batch_size)
4968           .prescale(prescale)
4969           .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4970       }
4971     }
4972   }
4973 
TEST(F32_VELU__AVX_RR2_LUT16_P3_X48,alpha)4974   TEST(F32_VELU__AVX_RR2_LUT16_P3_X48, alpha) {
4975     TEST_REQUIRES_X86_AVX;
4976     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
4977       for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
4978         VUnaryMicrokernelTester()
4979           .batch_size(batch_size)
4980           .alpha(alpha)
4981           .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4982       }
4983     }
4984   }
4985 
TEST(F32_VELU__AVX_RR2_LUT16_P3_X48,beta)4986   TEST(F32_VELU__AVX_RR2_LUT16_P3_X48, beta) {
4987     TEST_REQUIRES_X86_AVX;
4988     for (float beta : std::vector<float>({0.3f, 3.0f})) {
4989       for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
4990         VUnaryMicrokernelTester()
4991           .batch_size(batch_size)
4992           .beta(beta)
4993           .Test(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48, xnn_init_f32_elu_avx_rr2_lut16_p3_params);
4994       }
4995     }
4996   }
4997 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
4998 
4999 
5000 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX_RR2_P6_X8,batch_eq_8)5001   TEST(F32_VELU__AVX_RR2_P6_X8, batch_eq_8) {
5002     TEST_REQUIRES_X86_AVX;
5003     VUnaryMicrokernelTester()
5004       .batch_size(8)
5005       .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x8, xnn_init_f32_elu_avx_rr2_p6_params);
5006   }
5007 
TEST(F32_VELU__AVX_RR2_P6_X8,batch_div_8)5008   TEST(F32_VELU__AVX_RR2_P6_X8, batch_div_8) {
5009     TEST_REQUIRES_X86_AVX;
5010     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
5011       VUnaryMicrokernelTester()
5012         .batch_size(batch_size)
5013         .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x8, xnn_init_f32_elu_avx_rr2_p6_params);
5014     }
5015   }
5016 
TEST(F32_VELU__AVX_RR2_P6_X8,batch_lt_8)5017   TEST(F32_VELU__AVX_RR2_P6_X8, batch_lt_8) {
5018     TEST_REQUIRES_X86_AVX;
5019     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
5020       VUnaryMicrokernelTester()
5021         .batch_size(batch_size)
5022         .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x8, xnn_init_f32_elu_avx_rr2_p6_params);
5023     }
5024   }
5025 
TEST(F32_VELU__AVX_RR2_P6_X8,batch_gt_8)5026   TEST(F32_VELU__AVX_RR2_P6_X8, batch_gt_8) {
5027     TEST_REQUIRES_X86_AVX;
5028     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
5029       VUnaryMicrokernelTester()
5030         .batch_size(batch_size)
5031         .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x8, xnn_init_f32_elu_avx_rr2_p6_params);
5032     }
5033   }
5034 
TEST(F32_VELU__AVX_RR2_P6_X8,inplace)5035   TEST(F32_VELU__AVX_RR2_P6_X8, inplace) {
5036     TEST_REQUIRES_X86_AVX;
5037     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5038       VUnaryMicrokernelTester()
5039         .batch_size(batch_size)
5040         .inplace(true)
5041         .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x8, xnn_init_f32_elu_avx_rr2_p6_params);
5042     }
5043   }
5044 
TEST(F32_VELU__AVX_RR2_P6_X8,prescale)5045   TEST(F32_VELU__AVX_RR2_P6_X8, prescale) {
5046     TEST_REQUIRES_X86_AVX;
5047     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
5048       for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5049         VUnaryMicrokernelTester()
5050           .batch_size(batch_size)
5051           .prescale(prescale)
5052           .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x8, xnn_init_f32_elu_avx_rr2_p6_params);
5053       }
5054     }
5055   }
5056 
TEST(F32_VELU__AVX_RR2_P6_X8,alpha)5057   TEST(F32_VELU__AVX_RR2_P6_X8, alpha) {
5058     TEST_REQUIRES_X86_AVX;
5059     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
5060       for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5061         VUnaryMicrokernelTester()
5062           .batch_size(batch_size)
5063           .alpha(alpha)
5064           .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x8, xnn_init_f32_elu_avx_rr2_p6_params);
5065       }
5066     }
5067   }
5068 
TEST(F32_VELU__AVX_RR2_P6_X8,beta)5069   TEST(F32_VELU__AVX_RR2_P6_X8, beta) {
5070     TEST_REQUIRES_X86_AVX;
5071     for (float beta : std::vector<float>({0.3f, 3.0f})) {
5072       for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5073         VUnaryMicrokernelTester()
5074           .batch_size(batch_size)
5075           .beta(beta)
5076           .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x8, xnn_init_f32_elu_avx_rr2_p6_params);
5077       }
5078     }
5079   }
5080 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
5081 
5082 
5083 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX_RR2_P6_X16,batch_eq_16)5084   TEST(F32_VELU__AVX_RR2_P6_X16, batch_eq_16) {
5085     TEST_REQUIRES_X86_AVX;
5086     VUnaryMicrokernelTester()
5087       .batch_size(16)
5088       .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x16, xnn_init_f32_elu_avx_rr2_p6_params);
5089   }
5090 
TEST(F32_VELU__AVX_RR2_P6_X16,batch_div_16)5091   TEST(F32_VELU__AVX_RR2_P6_X16, batch_div_16) {
5092     TEST_REQUIRES_X86_AVX;
5093     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
5094       VUnaryMicrokernelTester()
5095         .batch_size(batch_size)
5096         .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x16, xnn_init_f32_elu_avx_rr2_p6_params);
5097     }
5098   }
5099 
TEST(F32_VELU__AVX_RR2_P6_X16,batch_lt_16)5100   TEST(F32_VELU__AVX_RR2_P6_X16, batch_lt_16) {
5101     TEST_REQUIRES_X86_AVX;
5102     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
5103       VUnaryMicrokernelTester()
5104         .batch_size(batch_size)
5105         .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x16, xnn_init_f32_elu_avx_rr2_p6_params);
5106     }
5107   }
5108 
TEST(F32_VELU__AVX_RR2_P6_X16,batch_gt_16)5109   TEST(F32_VELU__AVX_RR2_P6_X16, batch_gt_16) {
5110     TEST_REQUIRES_X86_AVX;
5111     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
5112       VUnaryMicrokernelTester()
5113         .batch_size(batch_size)
5114         .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x16, xnn_init_f32_elu_avx_rr2_p6_params);
5115     }
5116   }
5117 
TEST(F32_VELU__AVX_RR2_P6_X16,inplace)5118   TEST(F32_VELU__AVX_RR2_P6_X16, inplace) {
5119     TEST_REQUIRES_X86_AVX;
5120     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5121       VUnaryMicrokernelTester()
5122         .batch_size(batch_size)
5123         .inplace(true)
5124         .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x16, xnn_init_f32_elu_avx_rr2_p6_params);
5125     }
5126   }
5127 
TEST(F32_VELU__AVX_RR2_P6_X16,prescale)5128   TEST(F32_VELU__AVX_RR2_P6_X16, prescale) {
5129     TEST_REQUIRES_X86_AVX;
5130     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
5131       for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5132         VUnaryMicrokernelTester()
5133           .batch_size(batch_size)
5134           .prescale(prescale)
5135           .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x16, xnn_init_f32_elu_avx_rr2_p6_params);
5136       }
5137     }
5138   }
5139 
TEST(F32_VELU__AVX_RR2_P6_X16,alpha)5140   TEST(F32_VELU__AVX_RR2_P6_X16, alpha) {
5141     TEST_REQUIRES_X86_AVX;
5142     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
5143       for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5144         VUnaryMicrokernelTester()
5145           .batch_size(batch_size)
5146           .alpha(alpha)
5147           .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x16, xnn_init_f32_elu_avx_rr2_p6_params);
5148       }
5149     }
5150   }
5151 
TEST(F32_VELU__AVX_RR2_P6_X16,beta)5152   TEST(F32_VELU__AVX_RR2_P6_X16, beta) {
5153     TEST_REQUIRES_X86_AVX;
5154     for (float beta : std::vector<float>({0.3f, 3.0f})) {
5155       for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5156         VUnaryMicrokernelTester()
5157           .batch_size(batch_size)
5158           .beta(beta)
5159           .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x16, xnn_init_f32_elu_avx_rr2_p6_params);
5160       }
5161     }
5162   }
5163 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
5164 
5165 
5166 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX_RR2_P6_X24,batch_eq_24)5167   TEST(F32_VELU__AVX_RR2_P6_X24, batch_eq_24) {
5168     TEST_REQUIRES_X86_AVX;
5169     VUnaryMicrokernelTester()
5170       .batch_size(24)
5171       .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x24, xnn_init_f32_elu_avx_rr2_p6_params);
5172   }
5173 
TEST(F32_VELU__AVX_RR2_P6_X24,batch_div_24)5174   TEST(F32_VELU__AVX_RR2_P6_X24, batch_div_24) {
5175     TEST_REQUIRES_X86_AVX;
5176     for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
5177       VUnaryMicrokernelTester()
5178         .batch_size(batch_size)
5179         .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x24, xnn_init_f32_elu_avx_rr2_p6_params);
5180     }
5181   }
5182 
TEST(F32_VELU__AVX_RR2_P6_X24,batch_lt_24)5183   TEST(F32_VELU__AVX_RR2_P6_X24, batch_lt_24) {
5184     TEST_REQUIRES_X86_AVX;
5185     for (size_t batch_size = 1; batch_size < 24; batch_size++) {
5186       VUnaryMicrokernelTester()
5187         .batch_size(batch_size)
5188         .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x24, xnn_init_f32_elu_avx_rr2_p6_params);
5189     }
5190   }
5191 
TEST(F32_VELU__AVX_RR2_P6_X24,batch_gt_24)5192   TEST(F32_VELU__AVX_RR2_P6_X24, batch_gt_24) {
5193     TEST_REQUIRES_X86_AVX;
5194     for (size_t batch_size = 25; batch_size < 48; batch_size++) {
5195       VUnaryMicrokernelTester()
5196         .batch_size(batch_size)
5197         .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x24, xnn_init_f32_elu_avx_rr2_p6_params);
5198     }
5199   }
5200 
TEST(F32_VELU__AVX_RR2_P6_X24,inplace)5201   TEST(F32_VELU__AVX_RR2_P6_X24, inplace) {
5202     TEST_REQUIRES_X86_AVX;
5203     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5204       VUnaryMicrokernelTester()
5205         .batch_size(batch_size)
5206         .inplace(true)
5207         .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x24, xnn_init_f32_elu_avx_rr2_p6_params);
5208     }
5209   }
5210 
TEST(F32_VELU__AVX_RR2_P6_X24,prescale)5211   TEST(F32_VELU__AVX_RR2_P6_X24, prescale) {
5212     TEST_REQUIRES_X86_AVX;
5213     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
5214       for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5215         VUnaryMicrokernelTester()
5216           .batch_size(batch_size)
5217           .prescale(prescale)
5218           .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x24, xnn_init_f32_elu_avx_rr2_p6_params);
5219       }
5220     }
5221   }
5222 
TEST(F32_VELU__AVX_RR2_P6_X24,alpha)5223   TEST(F32_VELU__AVX_RR2_P6_X24, alpha) {
5224     TEST_REQUIRES_X86_AVX;
5225     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
5226       for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5227         VUnaryMicrokernelTester()
5228           .batch_size(batch_size)
5229           .alpha(alpha)
5230           .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x24, xnn_init_f32_elu_avx_rr2_p6_params);
5231       }
5232     }
5233   }
5234 
TEST(F32_VELU__AVX_RR2_P6_X24,beta)5235   TEST(F32_VELU__AVX_RR2_P6_X24, beta) {
5236     TEST_REQUIRES_X86_AVX;
5237     for (float beta : std::vector<float>({0.3f, 3.0f})) {
5238       for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5239         VUnaryMicrokernelTester()
5240           .batch_size(batch_size)
5241           .beta(beta)
5242           .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x24, xnn_init_f32_elu_avx_rr2_p6_params);
5243       }
5244     }
5245   }
5246 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
5247 
5248 
5249 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX_RR2_P6_X32,batch_eq_32)5250   TEST(F32_VELU__AVX_RR2_P6_X32, batch_eq_32) {
5251     TEST_REQUIRES_X86_AVX;
5252     VUnaryMicrokernelTester()
5253       .batch_size(32)
5254       .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x32, xnn_init_f32_elu_avx_rr2_p6_params);
5255   }
5256 
TEST(F32_VELU__AVX_RR2_P6_X32,batch_div_32)5257   TEST(F32_VELU__AVX_RR2_P6_X32, batch_div_32) {
5258     TEST_REQUIRES_X86_AVX;
5259     for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
5260       VUnaryMicrokernelTester()
5261         .batch_size(batch_size)
5262         .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x32, xnn_init_f32_elu_avx_rr2_p6_params);
5263     }
5264   }
5265 
TEST(F32_VELU__AVX_RR2_P6_X32,batch_lt_32)5266   TEST(F32_VELU__AVX_RR2_P6_X32, batch_lt_32) {
5267     TEST_REQUIRES_X86_AVX;
5268     for (size_t batch_size = 1; batch_size < 32; batch_size++) {
5269       VUnaryMicrokernelTester()
5270         .batch_size(batch_size)
5271         .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x32, xnn_init_f32_elu_avx_rr2_p6_params);
5272     }
5273   }
5274 
TEST(F32_VELU__AVX_RR2_P6_X32,batch_gt_32)5275   TEST(F32_VELU__AVX_RR2_P6_X32, batch_gt_32) {
5276     TEST_REQUIRES_X86_AVX;
5277     for (size_t batch_size = 33; batch_size < 64; batch_size++) {
5278       VUnaryMicrokernelTester()
5279         .batch_size(batch_size)
5280         .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x32, xnn_init_f32_elu_avx_rr2_p6_params);
5281     }
5282   }
5283 
TEST(F32_VELU__AVX_RR2_P6_X32,inplace)5284   TEST(F32_VELU__AVX_RR2_P6_X32, inplace) {
5285     TEST_REQUIRES_X86_AVX;
5286     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5287       VUnaryMicrokernelTester()
5288         .batch_size(batch_size)
5289         .inplace(true)
5290         .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x32, xnn_init_f32_elu_avx_rr2_p6_params);
5291     }
5292   }
5293 
TEST(F32_VELU__AVX_RR2_P6_X32,prescale)5294   TEST(F32_VELU__AVX_RR2_P6_X32, prescale) {
5295     TEST_REQUIRES_X86_AVX;
5296     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
5297       for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5298         VUnaryMicrokernelTester()
5299           .batch_size(batch_size)
5300           .prescale(prescale)
5301           .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x32, xnn_init_f32_elu_avx_rr2_p6_params);
5302       }
5303     }
5304   }
5305 
TEST(F32_VELU__AVX_RR2_P6_X32,alpha)5306   TEST(F32_VELU__AVX_RR2_P6_X32, alpha) {
5307     TEST_REQUIRES_X86_AVX;
5308     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
5309       for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5310         VUnaryMicrokernelTester()
5311           .batch_size(batch_size)
5312           .alpha(alpha)
5313           .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x32, xnn_init_f32_elu_avx_rr2_p6_params);
5314       }
5315     }
5316   }
5317 
TEST(F32_VELU__AVX_RR2_P6_X32,beta)5318   TEST(F32_VELU__AVX_RR2_P6_X32, beta) {
5319     TEST_REQUIRES_X86_AVX;
5320     for (float beta : std::vector<float>({0.3f, 3.0f})) {
5321       for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5322         VUnaryMicrokernelTester()
5323           .batch_size(batch_size)
5324           .beta(beta)
5325           .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x32, xnn_init_f32_elu_avx_rr2_p6_params);
5326       }
5327     }
5328   }
5329 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
5330 
5331 
5332 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX_RR2_P6_X40,batch_eq_40)5333   TEST(F32_VELU__AVX_RR2_P6_X40, batch_eq_40) {
5334     TEST_REQUIRES_X86_AVX;
5335     VUnaryMicrokernelTester()
5336       .batch_size(40)
5337       .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x40, xnn_init_f32_elu_avx_rr2_p6_params);
5338   }
5339 
TEST(F32_VELU__AVX_RR2_P6_X40,batch_div_40)5340   TEST(F32_VELU__AVX_RR2_P6_X40, batch_div_40) {
5341     TEST_REQUIRES_X86_AVX;
5342     for (size_t batch_size = 80; batch_size < 400; batch_size += 40) {
5343       VUnaryMicrokernelTester()
5344         .batch_size(batch_size)
5345         .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x40, xnn_init_f32_elu_avx_rr2_p6_params);
5346     }
5347   }
5348 
TEST(F32_VELU__AVX_RR2_P6_X40,batch_lt_40)5349   TEST(F32_VELU__AVX_RR2_P6_X40, batch_lt_40) {
5350     TEST_REQUIRES_X86_AVX;
5351     for (size_t batch_size = 1; batch_size < 40; batch_size++) {
5352       VUnaryMicrokernelTester()
5353         .batch_size(batch_size)
5354         .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x40, xnn_init_f32_elu_avx_rr2_p6_params);
5355     }
5356   }
5357 
TEST(F32_VELU__AVX_RR2_P6_X40,batch_gt_40)5358   TEST(F32_VELU__AVX_RR2_P6_X40, batch_gt_40) {
5359     TEST_REQUIRES_X86_AVX;
5360     for (size_t batch_size = 41; batch_size < 80; batch_size++) {
5361       VUnaryMicrokernelTester()
5362         .batch_size(batch_size)
5363         .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x40, xnn_init_f32_elu_avx_rr2_p6_params);
5364     }
5365   }
5366 
TEST(F32_VELU__AVX_RR2_P6_X40,inplace)5367   TEST(F32_VELU__AVX_RR2_P6_X40, inplace) {
5368     TEST_REQUIRES_X86_AVX;
5369     for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
5370       VUnaryMicrokernelTester()
5371         .batch_size(batch_size)
5372         .inplace(true)
5373         .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x40, xnn_init_f32_elu_avx_rr2_p6_params);
5374     }
5375   }
5376 
TEST(F32_VELU__AVX_RR2_P6_X40,prescale)5377   TEST(F32_VELU__AVX_RR2_P6_X40, prescale) {
5378     TEST_REQUIRES_X86_AVX;
5379     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
5380       for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
5381         VUnaryMicrokernelTester()
5382           .batch_size(batch_size)
5383           .prescale(prescale)
5384           .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x40, xnn_init_f32_elu_avx_rr2_p6_params);
5385       }
5386     }
5387   }
5388 
TEST(F32_VELU__AVX_RR2_P6_X40,alpha)5389   TEST(F32_VELU__AVX_RR2_P6_X40, alpha) {
5390     TEST_REQUIRES_X86_AVX;
5391     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
5392       for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
5393         VUnaryMicrokernelTester()
5394           .batch_size(batch_size)
5395           .alpha(alpha)
5396           .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x40, xnn_init_f32_elu_avx_rr2_p6_params);
5397       }
5398     }
5399   }
5400 
TEST(F32_VELU__AVX_RR2_P6_X40,beta)5401   TEST(F32_VELU__AVX_RR2_P6_X40, beta) {
5402     TEST_REQUIRES_X86_AVX;
5403     for (float beta : std::vector<float>({0.3f, 3.0f})) {
5404       for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
5405         VUnaryMicrokernelTester()
5406           .batch_size(batch_size)
5407           .beta(beta)
5408           .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x40, xnn_init_f32_elu_avx_rr2_p6_params);
5409       }
5410     }
5411   }
5412 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
5413 
5414 
5415 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX_RR2_P6_X48,batch_eq_48)5416   TEST(F32_VELU__AVX_RR2_P6_X48, batch_eq_48) {
5417     TEST_REQUIRES_X86_AVX;
5418     VUnaryMicrokernelTester()
5419       .batch_size(48)
5420       .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x48, xnn_init_f32_elu_avx_rr2_p6_params);
5421   }
5422 
TEST(F32_VELU__AVX_RR2_P6_X48,batch_div_48)5423   TEST(F32_VELU__AVX_RR2_P6_X48, batch_div_48) {
5424     TEST_REQUIRES_X86_AVX;
5425     for (size_t batch_size = 96; batch_size < 480; batch_size += 48) {
5426       VUnaryMicrokernelTester()
5427         .batch_size(batch_size)
5428         .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x48, xnn_init_f32_elu_avx_rr2_p6_params);
5429     }
5430   }
5431 
TEST(F32_VELU__AVX_RR2_P6_X48,batch_lt_48)5432   TEST(F32_VELU__AVX_RR2_P6_X48, batch_lt_48) {
5433     TEST_REQUIRES_X86_AVX;
5434     for (size_t batch_size = 1; batch_size < 48; batch_size++) {
5435       VUnaryMicrokernelTester()
5436         .batch_size(batch_size)
5437         .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x48, xnn_init_f32_elu_avx_rr2_p6_params);
5438     }
5439   }
5440 
TEST(F32_VELU__AVX_RR2_P6_X48,batch_gt_48)5441   TEST(F32_VELU__AVX_RR2_P6_X48, batch_gt_48) {
5442     TEST_REQUIRES_X86_AVX;
5443     for (size_t batch_size = 49; batch_size < 96; batch_size++) {
5444       VUnaryMicrokernelTester()
5445         .batch_size(batch_size)
5446         .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x48, xnn_init_f32_elu_avx_rr2_p6_params);
5447     }
5448   }
5449 
TEST(F32_VELU__AVX_RR2_P6_X48,inplace)5450   TEST(F32_VELU__AVX_RR2_P6_X48, inplace) {
5451     TEST_REQUIRES_X86_AVX;
5452     for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
5453       VUnaryMicrokernelTester()
5454         .batch_size(batch_size)
5455         .inplace(true)
5456         .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x48, xnn_init_f32_elu_avx_rr2_p6_params);
5457     }
5458   }
5459 
TEST(F32_VELU__AVX_RR2_P6_X48,prescale)5460   TEST(F32_VELU__AVX_RR2_P6_X48, prescale) {
5461     TEST_REQUIRES_X86_AVX;
5462     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
5463       for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
5464         VUnaryMicrokernelTester()
5465           .batch_size(batch_size)
5466           .prescale(prescale)
5467           .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x48, xnn_init_f32_elu_avx_rr2_p6_params);
5468       }
5469     }
5470   }
5471 
TEST(F32_VELU__AVX_RR2_P6_X48,alpha)5472   TEST(F32_VELU__AVX_RR2_P6_X48, alpha) {
5473     TEST_REQUIRES_X86_AVX;
5474     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
5475       for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
5476         VUnaryMicrokernelTester()
5477           .batch_size(batch_size)
5478           .alpha(alpha)
5479           .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x48, xnn_init_f32_elu_avx_rr2_p6_params);
5480       }
5481     }
5482   }
5483 
TEST(F32_VELU__AVX_RR2_P6_X48,beta)5484   TEST(F32_VELU__AVX_RR2_P6_X48, beta) {
5485     TEST_REQUIRES_X86_AVX;
5486     for (float beta : std::vector<float>({0.3f, 3.0f})) {
5487       for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
5488         VUnaryMicrokernelTester()
5489           .batch_size(batch_size)
5490           .beta(beta)
5491           .Test(xnn_f32_velu_ukernel__avx_rr2_p6_x48, xnn_init_f32_elu_avx_rr2_p6_params);
5492       }
5493     }
5494   }
5495 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
5496 
5497 
5498 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X8,batch_eq_8)5499   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X8, batch_eq_8) {
5500     TEST_REQUIRES_X86_AVX2;
5501     VUnaryMicrokernelTester()
5502       .batch_size(8)
5503       .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x8, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5504   }
5505 
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X8,batch_div_8)5506   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X8, batch_div_8) {
5507     TEST_REQUIRES_X86_AVX2;
5508     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
5509       VUnaryMicrokernelTester()
5510         .batch_size(batch_size)
5511         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x8, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5512     }
5513   }
5514 
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X8,batch_lt_8)5515   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X8, batch_lt_8) {
5516     TEST_REQUIRES_X86_AVX2;
5517     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
5518       VUnaryMicrokernelTester()
5519         .batch_size(batch_size)
5520         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x8, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5521     }
5522   }
5523 
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X8,batch_gt_8)5524   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X8, batch_gt_8) {
5525     TEST_REQUIRES_X86_AVX2;
5526     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
5527       VUnaryMicrokernelTester()
5528         .batch_size(batch_size)
5529         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x8, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5530     }
5531   }
5532 
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X8,inplace)5533   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X8, inplace) {
5534     TEST_REQUIRES_X86_AVX2;
5535     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5536       VUnaryMicrokernelTester()
5537         .batch_size(batch_size)
5538         .inplace(true)
5539         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x8, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5540     }
5541   }
5542 
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X8,prescale)5543   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X8, prescale) {
5544     TEST_REQUIRES_X86_AVX2;
5545     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
5546       for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5547         VUnaryMicrokernelTester()
5548           .batch_size(batch_size)
5549           .prescale(prescale)
5550           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x8, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5551       }
5552     }
5553   }
5554 
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X8,alpha)5555   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X8, alpha) {
5556     TEST_REQUIRES_X86_AVX2;
5557     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
5558       for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5559         VUnaryMicrokernelTester()
5560           .batch_size(batch_size)
5561           .alpha(alpha)
5562           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x8, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5563       }
5564     }
5565   }
5566 
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X8,beta)5567   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X8, beta) {
5568     TEST_REQUIRES_X86_AVX2;
5569     for (float beta : std::vector<float>({0.3f, 3.0f})) {
5570       for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5571         VUnaryMicrokernelTester()
5572           .batch_size(batch_size)
5573           .beta(beta)
5574           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x8, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5575       }
5576     }
5577   }
5578 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
5579 
5580 
5581 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X16,batch_eq_16)5582   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X16, batch_eq_16) {
5583     TEST_REQUIRES_X86_AVX2;
5584     VUnaryMicrokernelTester()
5585       .batch_size(16)
5586       .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x16, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5587   }
5588 
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X16,batch_div_16)5589   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X16, batch_div_16) {
5590     TEST_REQUIRES_X86_AVX2;
5591     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
5592       VUnaryMicrokernelTester()
5593         .batch_size(batch_size)
5594         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x16, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5595     }
5596   }
5597 
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X16,batch_lt_16)5598   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X16, batch_lt_16) {
5599     TEST_REQUIRES_X86_AVX2;
5600     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
5601       VUnaryMicrokernelTester()
5602         .batch_size(batch_size)
5603         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x16, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5604     }
5605   }
5606 
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X16,batch_gt_16)5607   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X16, batch_gt_16) {
5608     TEST_REQUIRES_X86_AVX2;
5609     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
5610       VUnaryMicrokernelTester()
5611         .batch_size(batch_size)
5612         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x16, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5613     }
5614   }
5615 
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X16,inplace)5616   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X16, inplace) {
5617     TEST_REQUIRES_X86_AVX2;
5618     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5619       VUnaryMicrokernelTester()
5620         .batch_size(batch_size)
5621         .inplace(true)
5622         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x16, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5623     }
5624   }
5625 
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X16,prescale)5626   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X16, prescale) {
5627     TEST_REQUIRES_X86_AVX2;
5628     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
5629       for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5630         VUnaryMicrokernelTester()
5631           .batch_size(batch_size)
5632           .prescale(prescale)
5633           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x16, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5634       }
5635     }
5636   }
5637 
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X16,alpha)5638   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X16, alpha) {
5639     TEST_REQUIRES_X86_AVX2;
5640     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
5641       for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5642         VUnaryMicrokernelTester()
5643           .batch_size(batch_size)
5644           .alpha(alpha)
5645           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x16, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5646       }
5647     }
5648   }
5649 
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X16,beta)5650   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X16, beta) {
5651     TEST_REQUIRES_X86_AVX2;
5652     for (float beta : std::vector<float>({0.3f, 3.0f})) {
5653       for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5654         VUnaryMicrokernelTester()
5655           .batch_size(batch_size)
5656           .beta(beta)
5657           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x16, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5658       }
5659     }
5660   }
5661 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
5662 
5663 
5664 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X24,batch_eq_24)5665   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X24, batch_eq_24) {
5666     TEST_REQUIRES_X86_AVX2;
5667     VUnaryMicrokernelTester()
5668       .batch_size(24)
5669       .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x24, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5670   }
5671 
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X24,batch_div_24)5672   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X24, batch_div_24) {
5673     TEST_REQUIRES_X86_AVX2;
5674     for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
5675       VUnaryMicrokernelTester()
5676         .batch_size(batch_size)
5677         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x24, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5678     }
5679   }
5680 
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X24,batch_lt_24)5681   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X24, batch_lt_24) {
5682     TEST_REQUIRES_X86_AVX2;
5683     for (size_t batch_size = 1; batch_size < 24; batch_size++) {
5684       VUnaryMicrokernelTester()
5685         .batch_size(batch_size)
5686         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x24, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5687     }
5688   }
5689 
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X24,batch_gt_24)5690   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X24, batch_gt_24) {
5691     TEST_REQUIRES_X86_AVX2;
5692     for (size_t batch_size = 25; batch_size < 48; batch_size++) {
5693       VUnaryMicrokernelTester()
5694         .batch_size(batch_size)
5695         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x24, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5696     }
5697   }
5698 
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X24,inplace)5699   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X24, inplace) {
5700     TEST_REQUIRES_X86_AVX2;
5701     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5702       VUnaryMicrokernelTester()
5703         .batch_size(batch_size)
5704         .inplace(true)
5705         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x24, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5706     }
5707   }
5708 
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X24,prescale)5709   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X24, prescale) {
5710     TEST_REQUIRES_X86_AVX2;
5711     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
5712       for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5713         VUnaryMicrokernelTester()
5714           .batch_size(batch_size)
5715           .prescale(prescale)
5716           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x24, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5717       }
5718     }
5719   }
5720 
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X24,alpha)5721   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X24, alpha) {
5722     TEST_REQUIRES_X86_AVX2;
5723     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
5724       for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5725         VUnaryMicrokernelTester()
5726           .batch_size(batch_size)
5727           .alpha(alpha)
5728           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x24, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5729       }
5730     }
5731   }
5732 
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X24,beta)5733   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X24, beta) {
5734     TEST_REQUIRES_X86_AVX2;
5735     for (float beta : std::vector<float>({0.3f, 3.0f})) {
5736       for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5737         VUnaryMicrokernelTester()
5738           .batch_size(batch_size)
5739           .beta(beta)
5740           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x24, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5741       }
5742     }
5743   }
5744 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
5745 
5746 
5747 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X32,batch_eq_32)5748   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X32, batch_eq_32) {
5749     TEST_REQUIRES_X86_AVX2;
5750     VUnaryMicrokernelTester()
5751       .batch_size(32)
5752       .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x32, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5753   }
5754 
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X32,batch_div_32)5755   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X32, batch_div_32) {
5756     TEST_REQUIRES_X86_AVX2;
5757     for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
5758       VUnaryMicrokernelTester()
5759         .batch_size(batch_size)
5760         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x32, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5761     }
5762   }
5763 
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X32,batch_lt_32)5764   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X32, batch_lt_32) {
5765     TEST_REQUIRES_X86_AVX2;
5766     for (size_t batch_size = 1; batch_size < 32; batch_size++) {
5767       VUnaryMicrokernelTester()
5768         .batch_size(batch_size)
5769         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x32, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5770     }
5771   }
5772 
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X32,batch_gt_32)5773   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X32, batch_gt_32) {
5774     TEST_REQUIRES_X86_AVX2;
5775     for (size_t batch_size = 33; batch_size < 64; batch_size++) {
5776       VUnaryMicrokernelTester()
5777         .batch_size(batch_size)
5778         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x32, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5779     }
5780   }
5781 
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X32,inplace)5782   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X32, inplace) {
5783     TEST_REQUIRES_X86_AVX2;
5784     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5785       VUnaryMicrokernelTester()
5786         .batch_size(batch_size)
5787         .inplace(true)
5788         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x32, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5789     }
5790   }
5791 
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X32,prescale)5792   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X32, prescale) {
5793     TEST_REQUIRES_X86_AVX2;
5794     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
5795       for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5796         VUnaryMicrokernelTester()
5797           .batch_size(batch_size)
5798           .prescale(prescale)
5799           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x32, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5800       }
5801     }
5802   }
5803 
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X32,alpha)5804   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X32, alpha) {
5805     TEST_REQUIRES_X86_AVX2;
5806     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
5807       for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5808         VUnaryMicrokernelTester()
5809           .batch_size(batch_size)
5810           .alpha(alpha)
5811           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x32, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5812       }
5813     }
5814   }
5815 
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X32,beta)5816   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X32, beta) {
5817     TEST_REQUIRES_X86_AVX2;
5818     for (float beta : std::vector<float>({0.3f, 3.0f})) {
5819       for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5820         VUnaryMicrokernelTester()
5821           .batch_size(batch_size)
5822           .beta(beta)
5823           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x32, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5824       }
5825     }
5826   }
5827 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
5828 
5829 
5830 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X40,batch_eq_40)5831   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X40, batch_eq_40) {
5832     TEST_REQUIRES_X86_AVX2;
5833     VUnaryMicrokernelTester()
5834       .batch_size(40)
5835       .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x40, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5836   }
5837 
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X40,batch_div_40)5838   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X40, batch_div_40) {
5839     TEST_REQUIRES_X86_AVX2;
5840     for (size_t batch_size = 80; batch_size < 400; batch_size += 40) {
5841       VUnaryMicrokernelTester()
5842         .batch_size(batch_size)
5843         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x40, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5844     }
5845   }
5846 
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X40,batch_lt_40)5847   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X40, batch_lt_40) {
5848     TEST_REQUIRES_X86_AVX2;
5849     for (size_t batch_size = 1; batch_size < 40; batch_size++) {
5850       VUnaryMicrokernelTester()
5851         .batch_size(batch_size)
5852         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x40, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5853     }
5854   }
5855 
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X40,batch_gt_40)5856   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X40, batch_gt_40) {
5857     TEST_REQUIRES_X86_AVX2;
5858     for (size_t batch_size = 41; batch_size < 80; batch_size++) {
5859       VUnaryMicrokernelTester()
5860         .batch_size(batch_size)
5861         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x40, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5862     }
5863   }
5864 
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X40,inplace)5865   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X40, inplace) {
5866     TEST_REQUIRES_X86_AVX2;
5867     for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
5868       VUnaryMicrokernelTester()
5869         .batch_size(batch_size)
5870         .inplace(true)
5871         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x40, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5872     }
5873   }
5874 
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X40,prescale)5875   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X40, prescale) {
5876     TEST_REQUIRES_X86_AVX2;
5877     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
5878       for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
5879         VUnaryMicrokernelTester()
5880           .batch_size(batch_size)
5881           .prescale(prescale)
5882           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x40, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5883       }
5884     }
5885   }
5886 
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X40,alpha)5887   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X40, alpha) {
5888     TEST_REQUIRES_X86_AVX2;
5889     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
5890       for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
5891         VUnaryMicrokernelTester()
5892           .batch_size(batch_size)
5893           .alpha(alpha)
5894           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x40, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5895       }
5896     }
5897   }
5898 
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X40,beta)5899   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X40, beta) {
5900     TEST_REQUIRES_X86_AVX2;
5901     for (float beta : std::vector<float>({0.3f, 3.0f})) {
5902       for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
5903         VUnaryMicrokernelTester()
5904           .batch_size(batch_size)
5905           .beta(beta)
5906           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x40, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5907       }
5908     }
5909   }
5910 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
5911 
5912 
5913 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X48,batch_eq_48)5914   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X48, batch_eq_48) {
5915     TEST_REQUIRES_X86_AVX2;
5916     VUnaryMicrokernelTester()
5917       .batch_size(48)
5918       .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x48, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5919   }
5920 
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X48,batch_div_48)5921   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X48, batch_div_48) {
5922     TEST_REQUIRES_X86_AVX2;
5923     for (size_t batch_size = 96; batch_size < 480; batch_size += 48) {
5924       VUnaryMicrokernelTester()
5925         .batch_size(batch_size)
5926         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x48, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5927     }
5928   }
5929 
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X48,batch_lt_48)5930   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X48, batch_lt_48) {
5931     TEST_REQUIRES_X86_AVX2;
5932     for (size_t batch_size = 1; batch_size < 48; batch_size++) {
5933       VUnaryMicrokernelTester()
5934         .batch_size(batch_size)
5935         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x48, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5936     }
5937   }
5938 
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X48,batch_gt_48)5939   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X48, batch_gt_48) {
5940     TEST_REQUIRES_X86_AVX2;
5941     for (size_t batch_size = 49; batch_size < 96; batch_size++) {
5942       VUnaryMicrokernelTester()
5943         .batch_size(batch_size)
5944         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x48, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5945     }
5946   }
5947 
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X48,inplace)5948   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X48, inplace) {
5949     TEST_REQUIRES_X86_AVX2;
5950     for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
5951       VUnaryMicrokernelTester()
5952         .batch_size(batch_size)
5953         .inplace(true)
5954         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x48, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5955     }
5956   }
5957 
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X48,prescale)5958   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X48, prescale) {
5959     TEST_REQUIRES_X86_AVX2;
5960     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
5961       for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
5962         VUnaryMicrokernelTester()
5963           .batch_size(batch_size)
5964           .prescale(prescale)
5965           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x48, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5966       }
5967     }
5968   }
5969 
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X48,alpha)5970   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X48, alpha) {
5971     TEST_REQUIRES_X86_AVX2;
5972     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
5973       for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
5974         VUnaryMicrokernelTester()
5975           .batch_size(batch_size)
5976           .alpha(alpha)
5977           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x48, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5978       }
5979     }
5980   }
5981 
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X48,beta)5982   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X48, beta) {
5983     TEST_REQUIRES_X86_AVX2;
5984     for (float beta : std::vector<float>({0.3f, 3.0f})) {
5985       for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
5986         VUnaryMicrokernelTester()
5987           .batch_size(batch_size)
5988           .beta(beta)
5989           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x48, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
5990       }
5991     }
5992   }
5993 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
5994 
5995 
5996 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X56,batch_eq_56)5997   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X56, batch_eq_56) {
5998     TEST_REQUIRES_X86_AVX2;
5999     VUnaryMicrokernelTester()
6000       .batch_size(56)
6001       .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x56, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
6002   }
6003 
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X56,batch_div_56)6004   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X56, batch_div_56) {
6005     TEST_REQUIRES_X86_AVX2;
6006     for (size_t batch_size = 112; batch_size < 560; batch_size += 56) {
6007       VUnaryMicrokernelTester()
6008         .batch_size(batch_size)
6009         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x56, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
6010     }
6011   }
6012 
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X56,batch_lt_56)6013   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X56, batch_lt_56) {
6014     TEST_REQUIRES_X86_AVX2;
6015     for (size_t batch_size = 1; batch_size < 56; batch_size++) {
6016       VUnaryMicrokernelTester()
6017         .batch_size(batch_size)
6018         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x56, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
6019     }
6020   }
6021 
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X56,batch_gt_56)6022   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X56, batch_gt_56) {
6023     TEST_REQUIRES_X86_AVX2;
6024     for (size_t batch_size = 57; batch_size < 112; batch_size++) {
6025       VUnaryMicrokernelTester()
6026         .batch_size(batch_size)
6027         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x56, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
6028     }
6029   }
6030 
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X56,inplace)6031   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X56, inplace) {
6032     TEST_REQUIRES_X86_AVX2;
6033     for (size_t batch_size = 1; batch_size <= 280; batch_size += 55) {
6034       VUnaryMicrokernelTester()
6035         .batch_size(batch_size)
6036         .inplace(true)
6037         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x56, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
6038     }
6039   }
6040 
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X56,prescale)6041   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X56, prescale) {
6042     TEST_REQUIRES_X86_AVX2;
6043     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
6044       for (size_t batch_size = 1; batch_size <= 280; batch_size += 55) {
6045         VUnaryMicrokernelTester()
6046           .batch_size(batch_size)
6047           .prescale(prescale)
6048           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x56, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
6049       }
6050     }
6051   }
6052 
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X56,alpha)6053   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X56, alpha) {
6054     TEST_REQUIRES_X86_AVX2;
6055     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
6056       for (size_t batch_size = 1; batch_size <= 280; batch_size += 55) {
6057         VUnaryMicrokernelTester()
6058           .batch_size(batch_size)
6059           .alpha(alpha)
6060           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x56, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
6061       }
6062     }
6063   }
6064 
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X56,beta)6065   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X56, beta) {
6066     TEST_REQUIRES_X86_AVX2;
6067     for (float beta : std::vector<float>({0.3f, 3.0f})) {
6068       for (size_t batch_size = 1; batch_size <= 280; batch_size += 55) {
6069         VUnaryMicrokernelTester()
6070           .batch_size(batch_size)
6071           .beta(beta)
6072           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x56, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
6073       }
6074     }
6075   }
6076 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
6077 
6078 
6079 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X64,batch_eq_64)6080   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X64, batch_eq_64) {
6081     TEST_REQUIRES_X86_AVX2;
6082     VUnaryMicrokernelTester()
6083       .batch_size(64)
6084       .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x64, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
6085   }
6086 
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X64,batch_div_64)6087   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X64, batch_div_64) {
6088     TEST_REQUIRES_X86_AVX2;
6089     for (size_t batch_size = 128; batch_size < 640; batch_size += 64) {
6090       VUnaryMicrokernelTester()
6091         .batch_size(batch_size)
6092         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x64, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
6093     }
6094   }
6095 
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X64,batch_lt_64)6096   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X64, batch_lt_64) {
6097     TEST_REQUIRES_X86_AVX2;
6098     for (size_t batch_size = 1; batch_size < 64; batch_size++) {
6099       VUnaryMicrokernelTester()
6100         .batch_size(batch_size)
6101         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x64, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
6102     }
6103   }
6104 
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X64,batch_gt_64)6105   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X64, batch_gt_64) {
6106     TEST_REQUIRES_X86_AVX2;
6107     for (size_t batch_size = 65; batch_size < 128; batch_size++) {
6108       VUnaryMicrokernelTester()
6109         .batch_size(batch_size)
6110         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x64, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
6111     }
6112   }
6113 
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X64,inplace)6114   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X64, inplace) {
6115     TEST_REQUIRES_X86_AVX2;
6116     for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
6117       VUnaryMicrokernelTester()
6118         .batch_size(batch_size)
6119         .inplace(true)
6120         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x64, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
6121     }
6122   }
6123 
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X64,prescale)6124   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X64, prescale) {
6125     TEST_REQUIRES_X86_AVX2;
6126     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
6127       for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
6128         VUnaryMicrokernelTester()
6129           .batch_size(batch_size)
6130           .prescale(prescale)
6131           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x64, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
6132       }
6133     }
6134   }
6135 
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X64,alpha)6136   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X64, alpha) {
6137     TEST_REQUIRES_X86_AVX2;
6138     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
6139       for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
6140         VUnaryMicrokernelTester()
6141           .batch_size(batch_size)
6142           .alpha(alpha)
6143           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x64, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
6144       }
6145     }
6146   }
6147 
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X64,beta)6148   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X64, beta) {
6149     TEST_REQUIRES_X86_AVX2;
6150     for (float beta : std::vector<float>({0.3f, 3.0f})) {
6151       for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
6152         VUnaryMicrokernelTester()
6153           .batch_size(batch_size)
6154           .beta(beta)
6155           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x64, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
6156       }
6157     }
6158   }
6159 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
6160 
6161 
6162 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X72,batch_eq_72)6163   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X72, batch_eq_72) {
6164     TEST_REQUIRES_X86_AVX2;
6165     VUnaryMicrokernelTester()
6166       .batch_size(72)
6167       .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
6168   }
6169 
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X72,batch_div_72)6170   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X72, batch_div_72) {
6171     TEST_REQUIRES_X86_AVX2;
6172     for (size_t batch_size = 144; batch_size < 720; batch_size += 72) {
6173       VUnaryMicrokernelTester()
6174         .batch_size(batch_size)
6175         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
6176     }
6177   }
6178 
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X72,batch_lt_72)6179   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X72, batch_lt_72) {
6180     TEST_REQUIRES_X86_AVX2;
6181     for (size_t batch_size = 1; batch_size < 72; batch_size++) {
6182       VUnaryMicrokernelTester()
6183         .batch_size(batch_size)
6184         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
6185     }
6186   }
6187 
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X72,batch_gt_72)6188   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X72, batch_gt_72) {
6189     TEST_REQUIRES_X86_AVX2;
6190     for (size_t batch_size = 73; batch_size < 144; batch_size++) {
6191       VUnaryMicrokernelTester()
6192         .batch_size(batch_size)
6193         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
6194     }
6195   }
6196 
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X72,inplace)6197   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X72, inplace) {
6198     TEST_REQUIRES_X86_AVX2;
6199     for (size_t batch_size = 1; batch_size <= 360; batch_size += 71) {
6200       VUnaryMicrokernelTester()
6201         .batch_size(batch_size)
6202         .inplace(true)
6203         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
6204     }
6205   }
6206 
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X72,prescale)6207   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X72, prescale) {
6208     TEST_REQUIRES_X86_AVX2;
6209     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
6210       for (size_t batch_size = 1; batch_size <= 360; batch_size += 71) {
6211         VUnaryMicrokernelTester()
6212           .batch_size(batch_size)
6213           .prescale(prescale)
6214           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
6215       }
6216     }
6217   }
6218 
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X72,alpha)6219   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X72, alpha) {
6220     TEST_REQUIRES_X86_AVX2;
6221     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
6222       for (size_t batch_size = 1; batch_size <= 360; batch_size += 71) {
6223         VUnaryMicrokernelTester()
6224           .batch_size(batch_size)
6225           .alpha(alpha)
6226           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
6227       }
6228     }
6229   }
6230 
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X72,beta)6231   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X72, beta) {
6232     TEST_REQUIRES_X86_AVX2;
6233     for (float beta : std::vector<float>({0.3f, 3.0f})) {
6234       for (size_t batch_size = 1; batch_size <= 360; batch_size += 71) {
6235         VUnaryMicrokernelTester()
6236           .batch_size(batch_size)
6237           .beta(beta)
6238           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
6239       }
6240     }
6241   }
6242 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
6243 
6244 
6245 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X80,batch_eq_80)6246   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X80, batch_eq_80) {
6247     TEST_REQUIRES_X86_AVX2;
6248     VUnaryMicrokernelTester()
6249       .batch_size(80)
6250       .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x80, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
6251   }
6252 
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X80,batch_div_80)6253   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X80, batch_div_80) {
6254     TEST_REQUIRES_X86_AVX2;
6255     for (size_t batch_size = 160; batch_size < 800; batch_size += 80) {
6256       VUnaryMicrokernelTester()
6257         .batch_size(batch_size)
6258         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x80, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
6259     }
6260   }
6261 
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X80,batch_lt_80)6262   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X80, batch_lt_80) {
6263     TEST_REQUIRES_X86_AVX2;
6264     for (size_t batch_size = 1; batch_size < 80; batch_size++) {
6265       VUnaryMicrokernelTester()
6266         .batch_size(batch_size)
6267         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x80, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
6268     }
6269   }
6270 
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X80,batch_gt_80)6271   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X80, batch_gt_80) {
6272     TEST_REQUIRES_X86_AVX2;
6273     for (size_t batch_size = 81; batch_size < 160; batch_size++) {
6274       VUnaryMicrokernelTester()
6275         .batch_size(batch_size)
6276         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x80, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
6277     }
6278   }
6279 
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X80,inplace)6280   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X80, inplace) {
6281     TEST_REQUIRES_X86_AVX2;
6282     for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
6283       VUnaryMicrokernelTester()
6284         .batch_size(batch_size)
6285         .inplace(true)
6286         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x80, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
6287     }
6288   }
6289 
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X80,prescale)6290   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X80, prescale) {
6291     TEST_REQUIRES_X86_AVX2;
6292     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
6293       for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
6294         VUnaryMicrokernelTester()
6295           .batch_size(batch_size)
6296           .prescale(prescale)
6297           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x80, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
6298       }
6299     }
6300   }
6301 
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X80,alpha)6302   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X80, alpha) {
6303     TEST_REQUIRES_X86_AVX2;
6304     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
6305       for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
6306         VUnaryMicrokernelTester()
6307           .batch_size(batch_size)
6308           .alpha(alpha)
6309           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x80, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
6310       }
6311     }
6312   }
6313 
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X80,beta)6314   TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X80, beta) {
6315     TEST_REQUIRES_X86_AVX2;
6316     for (float beta : std::vector<float>({0.3f, 3.0f})) {
6317       for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
6318         VUnaryMicrokernelTester()
6319           .batch_size(batch_size)
6320           .beta(beta)
6321           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x80, xnn_init_f32_elu_avx2_rr1_lut4_p4_params);
6322       }
6323     }
6324   }
6325 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
6326 
6327 
6328 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X8,batch_eq_8)6329   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X8, batch_eq_8) {
6330     TEST_REQUIRES_X86_AVX2;
6331     VUnaryMicrokernelTester()
6332       .batch_size(8)
6333       .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x8, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6334   }
6335 
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X8,batch_div_8)6336   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X8, batch_div_8) {
6337     TEST_REQUIRES_X86_AVX2;
6338     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
6339       VUnaryMicrokernelTester()
6340         .batch_size(batch_size)
6341         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x8, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6342     }
6343   }
6344 
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X8,batch_lt_8)6345   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X8, batch_lt_8) {
6346     TEST_REQUIRES_X86_AVX2;
6347     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
6348       VUnaryMicrokernelTester()
6349         .batch_size(batch_size)
6350         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x8, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6351     }
6352   }
6353 
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X8,batch_gt_8)6354   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X8, batch_gt_8) {
6355     TEST_REQUIRES_X86_AVX2;
6356     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
6357       VUnaryMicrokernelTester()
6358         .batch_size(batch_size)
6359         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x8, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6360     }
6361   }
6362 
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X8,inplace)6363   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X8, inplace) {
6364     TEST_REQUIRES_X86_AVX2;
6365     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
6366       VUnaryMicrokernelTester()
6367         .batch_size(batch_size)
6368         .inplace(true)
6369         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x8, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6370     }
6371   }
6372 
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X8,prescale)6373   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X8, prescale) {
6374     TEST_REQUIRES_X86_AVX2;
6375     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
6376       for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
6377         VUnaryMicrokernelTester()
6378           .batch_size(batch_size)
6379           .prescale(prescale)
6380           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x8, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6381       }
6382     }
6383   }
6384 
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X8,alpha)6385   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X8, alpha) {
6386     TEST_REQUIRES_X86_AVX2;
6387     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
6388       for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
6389         VUnaryMicrokernelTester()
6390           .batch_size(batch_size)
6391           .alpha(alpha)
6392           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x8, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6393       }
6394     }
6395   }
6396 
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X8,beta)6397   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X8, beta) {
6398     TEST_REQUIRES_X86_AVX2;
6399     for (float beta : std::vector<float>({0.3f, 3.0f})) {
6400       for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
6401         VUnaryMicrokernelTester()
6402           .batch_size(batch_size)
6403           .beta(beta)
6404           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x8, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6405       }
6406     }
6407   }
6408 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
6409 
6410 
6411 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X16,batch_eq_16)6412   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X16, batch_eq_16) {
6413     TEST_REQUIRES_X86_AVX2;
6414     VUnaryMicrokernelTester()
6415       .batch_size(16)
6416       .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x16, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6417   }
6418 
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X16,batch_div_16)6419   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X16, batch_div_16) {
6420     TEST_REQUIRES_X86_AVX2;
6421     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
6422       VUnaryMicrokernelTester()
6423         .batch_size(batch_size)
6424         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x16, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6425     }
6426   }
6427 
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X16,batch_lt_16)6428   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X16, batch_lt_16) {
6429     TEST_REQUIRES_X86_AVX2;
6430     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
6431       VUnaryMicrokernelTester()
6432         .batch_size(batch_size)
6433         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x16, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6434     }
6435   }
6436 
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X16,batch_gt_16)6437   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X16, batch_gt_16) {
6438     TEST_REQUIRES_X86_AVX2;
6439     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
6440       VUnaryMicrokernelTester()
6441         .batch_size(batch_size)
6442         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x16, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6443     }
6444   }
6445 
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X16,inplace)6446   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X16, inplace) {
6447     TEST_REQUIRES_X86_AVX2;
6448     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
6449       VUnaryMicrokernelTester()
6450         .batch_size(batch_size)
6451         .inplace(true)
6452         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x16, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6453     }
6454   }
6455 
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X16,prescale)6456   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X16, prescale) {
6457     TEST_REQUIRES_X86_AVX2;
6458     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
6459       for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
6460         VUnaryMicrokernelTester()
6461           .batch_size(batch_size)
6462           .prescale(prescale)
6463           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x16, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6464       }
6465     }
6466   }
6467 
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X16,alpha)6468   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X16, alpha) {
6469     TEST_REQUIRES_X86_AVX2;
6470     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
6471       for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
6472         VUnaryMicrokernelTester()
6473           .batch_size(batch_size)
6474           .alpha(alpha)
6475           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x16, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6476       }
6477     }
6478   }
6479 
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X16,beta)6480   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X16, beta) {
6481     TEST_REQUIRES_X86_AVX2;
6482     for (float beta : std::vector<float>({0.3f, 3.0f})) {
6483       for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
6484         VUnaryMicrokernelTester()
6485           .batch_size(batch_size)
6486           .beta(beta)
6487           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x16, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6488       }
6489     }
6490   }
6491 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
6492 
6493 
6494 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X24,batch_eq_24)6495   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X24, batch_eq_24) {
6496     TEST_REQUIRES_X86_AVX2;
6497     VUnaryMicrokernelTester()
6498       .batch_size(24)
6499       .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x24, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6500   }
6501 
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X24,batch_div_24)6502   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X24, batch_div_24) {
6503     TEST_REQUIRES_X86_AVX2;
6504     for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
6505       VUnaryMicrokernelTester()
6506         .batch_size(batch_size)
6507         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x24, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6508     }
6509   }
6510 
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X24,batch_lt_24)6511   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X24, batch_lt_24) {
6512     TEST_REQUIRES_X86_AVX2;
6513     for (size_t batch_size = 1; batch_size < 24; batch_size++) {
6514       VUnaryMicrokernelTester()
6515         .batch_size(batch_size)
6516         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x24, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6517     }
6518   }
6519 
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X24,batch_gt_24)6520   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X24, batch_gt_24) {
6521     TEST_REQUIRES_X86_AVX2;
6522     for (size_t batch_size = 25; batch_size < 48; batch_size++) {
6523       VUnaryMicrokernelTester()
6524         .batch_size(batch_size)
6525         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x24, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6526     }
6527   }
6528 
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X24,inplace)6529   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X24, inplace) {
6530     TEST_REQUIRES_X86_AVX2;
6531     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
6532       VUnaryMicrokernelTester()
6533         .batch_size(batch_size)
6534         .inplace(true)
6535         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x24, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6536     }
6537   }
6538 
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X24,prescale)6539   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X24, prescale) {
6540     TEST_REQUIRES_X86_AVX2;
6541     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
6542       for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
6543         VUnaryMicrokernelTester()
6544           .batch_size(batch_size)
6545           .prescale(prescale)
6546           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x24, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6547       }
6548     }
6549   }
6550 
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X24,alpha)6551   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X24, alpha) {
6552     TEST_REQUIRES_X86_AVX2;
6553     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
6554       for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
6555         VUnaryMicrokernelTester()
6556           .batch_size(batch_size)
6557           .alpha(alpha)
6558           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x24, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6559       }
6560     }
6561   }
6562 
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X24,beta)6563   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X24, beta) {
6564     TEST_REQUIRES_X86_AVX2;
6565     for (float beta : std::vector<float>({0.3f, 3.0f})) {
6566       for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
6567         VUnaryMicrokernelTester()
6568           .batch_size(batch_size)
6569           .beta(beta)
6570           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x24, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6571       }
6572     }
6573   }
6574 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
6575 
6576 
6577 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X32,batch_eq_32)6578   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X32, batch_eq_32) {
6579     TEST_REQUIRES_X86_AVX2;
6580     VUnaryMicrokernelTester()
6581       .batch_size(32)
6582       .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x32, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6583   }
6584 
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X32,batch_div_32)6585   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X32, batch_div_32) {
6586     TEST_REQUIRES_X86_AVX2;
6587     for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
6588       VUnaryMicrokernelTester()
6589         .batch_size(batch_size)
6590         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x32, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6591     }
6592   }
6593 
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X32,batch_lt_32)6594   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X32, batch_lt_32) {
6595     TEST_REQUIRES_X86_AVX2;
6596     for (size_t batch_size = 1; batch_size < 32; batch_size++) {
6597       VUnaryMicrokernelTester()
6598         .batch_size(batch_size)
6599         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x32, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6600     }
6601   }
6602 
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X32,batch_gt_32)6603   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X32, batch_gt_32) {
6604     TEST_REQUIRES_X86_AVX2;
6605     for (size_t batch_size = 33; batch_size < 64; batch_size++) {
6606       VUnaryMicrokernelTester()
6607         .batch_size(batch_size)
6608         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x32, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6609     }
6610   }
6611 
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X32,inplace)6612   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X32, inplace) {
6613     TEST_REQUIRES_X86_AVX2;
6614     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
6615       VUnaryMicrokernelTester()
6616         .batch_size(batch_size)
6617         .inplace(true)
6618         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x32, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6619     }
6620   }
6621 
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X32,prescale)6622   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X32, prescale) {
6623     TEST_REQUIRES_X86_AVX2;
6624     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
6625       for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
6626         VUnaryMicrokernelTester()
6627           .batch_size(batch_size)
6628           .prescale(prescale)
6629           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x32, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6630       }
6631     }
6632   }
6633 
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X32,alpha)6634   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X32, alpha) {
6635     TEST_REQUIRES_X86_AVX2;
6636     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
6637       for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
6638         VUnaryMicrokernelTester()
6639           .batch_size(batch_size)
6640           .alpha(alpha)
6641           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x32, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6642       }
6643     }
6644   }
6645 
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X32,beta)6646   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X32, beta) {
6647     TEST_REQUIRES_X86_AVX2;
6648     for (float beta : std::vector<float>({0.3f, 3.0f})) {
6649       for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
6650         VUnaryMicrokernelTester()
6651           .batch_size(batch_size)
6652           .beta(beta)
6653           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x32, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6654       }
6655     }
6656   }
6657 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
6658 
6659 
6660 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X40,batch_eq_40)6661   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X40, batch_eq_40) {
6662     TEST_REQUIRES_X86_AVX2;
6663     VUnaryMicrokernelTester()
6664       .batch_size(40)
6665       .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x40, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6666   }
6667 
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X40,batch_div_40)6668   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X40, batch_div_40) {
6669     TEST_REQUIRES_X86_AVX2;
6670     for (size_t batch_size = 80; batch_size < 400; batch_size += 40) {
6671       VUnaryMicrokernelTester()
6672         .batch_size(batch_size)
6673         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x40, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6674     }
6675   }
6676 
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X40,batch_lt_40)6677   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X40, batch_lt_40) {
6678     TEST_REQUIRES_X86_AVX2;
6679     for (size_t batch_size = 1; batch_size < 40; batch_size++) {
6680       VUnaryMicrokernelTester()
6681         .batch_size(batch_size)
6682         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x40, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6683     }
6684   }
6685 
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X40,batch_gt_40)6686   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X40, batch_gt_40) {
6687     TEST_REQUIRES_X86_AVX2;
6688     for (size_t batch_size = 41; batch_size < 80; batch_size++) {
6689       VUnaryMicrokernelTester()
6690         .batch_size(batch_size)
6691         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x40, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6692     }
6693   }
6694 
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X40,inplace)6695   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X40, inplace) {
6696     TEST_REQUIRES_X86_AVX2;
6697     for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
6698       VUnaryMicrokernelTester()
6699         .batch_size(batch_size)
6700         .inplace(true)
6701         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x40, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6702     }
6703   }
6704 
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X40,prescale)6705   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X40, prescale) {
6706     TEST_REQUIRES_X86_AVX2;
6707     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
6708       for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
6709         VUnaryMicrokernelTester()
6710           .batch_size(batch_size)
6711           .prescale(prescale)
6712           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x40, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6713       }
6714     }
6715   }
6716 
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X40,alpha)6717   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X40, alpha) {
6718     TEST_REQUIRES_X86_AVX2;
6719     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
6720       for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
6721         VUnaryMicrokernelTester()
6722           .batch_size(batch_size)
6723           .alpha(alpha)
6724           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x40, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6725       }
6726     }
6727   }
6728 
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X40,beta)6729   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X40, beta) {
6730     TEST_REQUIRES_X86_AVX2;
6731     for (float beta : std::vector<float>({0.3f, 3.0f})) {
6732       for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
6733         VUnaryMicrokernelTester()
6734           .batch_size(batch_size)
6735           .beta(beta)
6736           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x40, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6737       }
6738     }
6739   }
6740 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
6741 
6742 
6743 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X48,batch_eq_48)6744   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X48, batch_eq_48) {
6745     TEST_REQUIRES_X86_AVX2;
6746     VUnaryMicrokernelTester()
6747       .batch_size(48)
6748       .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x48, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6749   }
6750 
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X48,batch_div_48)6751   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X48, batch_div_48) {
6752     TEST_REQUIRES_X86_AVX2;
6753     for (size_t batch_size = 96; batch_size < 480; batch_size += 48) {
6754       VUnaryMicrokernelTester()
6755         .batch_size(batch_size)
6756         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x48, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6757     }
6758   }
6759 
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X48,batch_lt_48)6760   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X48, batch_lt_48) {
6761     TEST_REQUIRES_X86_AVX2;
6762     for (size_t batch_size = 1; batch_size < 48; batch_size++) {
6763       VUnaryMicrokernelTester()
6764         .batch_size(batch_size)
6765         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x48, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6766     }
6767   }
6768 
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X48,batch_gt_48)6769   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X48, batch_gt_48) {
6770     TEST_REQUIRES_X86_AVX2;
6771     for (size_t batch_size = 49; batch_size < 96; batch_size++) {
6772       VUnaryMicrokernelTester()
6773         .batch_size(batch_size)
6774         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x48, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6775     }
6776   }
6777 
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X48,inplace)6778   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X48, inplace) {
6779     TEST_REQUIRES_X86_AVX2;
6780     for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
6781       VUnaryMicrokernelTester()
6782         .batch_size(batch_size)
6783         .inplace(true)
6784         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x48, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6785     }
6786   }
6787 
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X48,prescale)6788   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X48, prescale) {
6789     TEST_REQUIRES_X86_AVX2;
6790     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
6791       for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
6792         VUnaryMicrokernelTester()
6793           .batch_size(batch_size)
6794           .prescale(prescale)
6795           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x48, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6796       }
6797     }
6798   }
6799 
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X48,alpha)6800   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X48, alpha) {
6801     TEST_REQUIRES_X86_AVX2;
6802     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
6803       for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
6804         VUnaryMicrokernelTester()
6805           .batch_size(batch_size)
6806           .alpha(alpha)
6807           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x48, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6808       }
6809     }
6810   }
6811 
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X48,beta)6812   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X48, beta) {
6813     TEST_REQUIRES_X86_AVX2;
6814     for (float beta : std::vector<float>({0.3f, 3.0f})) {
6815       for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
6816         VUnaryMicrokernelTester()
6817           .batch_size(batch_size)
6818           .beta(beta)
6819           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x48, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6820       }
6821     }
6822   }
6823 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
6824 
6825 
6826 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X56,batch_eq_56)6827   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X56, batch_eq_56) {
6828     TEST_REQUIRES_X86_AVX2;
6829     VUnaryMicrokernelTester()
6830       .batch_size(56)
6831       .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6832   }
6833 
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X56,batch_div_56)6834   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X56, batch_div_56) {
6835     TEST_REQUIRES_X86_AVX2;
6836     for (size_t batch_size = 112; batch_size < 560; batch_size += 56) {
6837       VUnaryMicrokernelTester()
6838         .batch_size(batch_size)
6839         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6840     }
6841   }
6842 
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X56,batch_lt_56)6843   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X56, batch_lt_56) {
6844     TEST_REQUIRES_X86_AVX2;
6845     for (size_t batch_size = 1; batch_size < 56; batch_size++) {
6846       VUnaryMicrokernelTester()
6847         .batch_size(batch_size)
6848         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6849     }
6850   }
6851 
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X56,batch_gt_56)6852   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X56, batch_gt_56) {
6853     TEST_REQUIRES_X86_AVX2;
6854     for (size_t batch_size = 57; batch_size < 112; batch_size++) {
6855       VUnaryMicrokernelTester()
6856         .batch_size(batch_size)
6857         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6858     }
6859   }
6860 
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X56,inplace)6861   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X56, inplace) {
6862     TEST_REQUIRES_X86_AVX2;
6863     for (size_t batch_size = 1; batch_size <= 280; batch_size += 55) {
6864       VUnaryMicrokernelTester()
6865         .batch_size(batch_size)
6866         .inplace(true)
6867         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6868     }
6869   }
6870 
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X56,prescale)6871   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X56, prescale) {
6872     TEST_REQUIRES_X86_AVX2;
6873     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
6874       for (size_t batch_size = 1; batch_size <= 280; batch_size += 55) {
6875         VUnaryMicrokernelTester()
6876           .batch_size(batch_size)
6877           .prescale(prescale)
6878           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6879       }
6880     }
6881   }
6882 
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X56,alpha)6883   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X56, alpha) {
6884     TEST_REQUIRES_X86_AVX2;
6885     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
6886       for (size_t batch_size = 1; batch_size <= 280; batch_size += 55) {
6887         VUnaryMicrokernelTester()
6888           .batch_size(batch_size)
6889           .alpha(alpha)
6890           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6891       }
6892     }
6893   }
6894 
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X56,beta)6895   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X56, beta) {
6896     TEST_REQUIRES_X86_AVX2;
6897     for (float beta : std::vector<float>({0.3f, 3.0f})) {
6898       for (size_t batch_size = 1; batch_size <= 280; batch_size += 55) {
6899         VUnaryMicrokernelTester()
6900           .batch_size(batch_size)
6901           .beta(beta)
6902           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6903       }
6904     }
6905   }
6906 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
6907 
6908 
6909 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X64,batch_eq_64)6910   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X64, batch_eq_64) {
6911     TEST_REQUIRES_X86_AVX2;
6912     VUnaryMicrokernelTester()
6913       .batch_size(64)
6914       .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x64, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6915   }
6916 
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X64,batch_div_64)6917   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X64, batch_div_64) {
6918     TEST_REQUIRES_X86_AVX2;
6919     for (size_t batch_size = 128; batch_size < 640; batch_size += 64) {
6920       VUnaryMicrokernelTester()
6921         .batch_size(batch_size)
6922         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x64, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6923     }
6924   }
6925 
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X64,batch_lt_64)6926   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X64, batch_lt_64) {
6927     TEST_REQUIRES_X86_AVX2;
6928     for (size_t batch_size = 1; batch_size < 64; batch_size++) {
6929       VUnaryMicrokernelTester()
6930         .batch_size(batch_size)
6931         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x64, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6932     }
6933   }
6934 
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X64,batch_gt_64)6935   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X64, batch_gt_64) {
6936     TEST_REQUIRES_X86_AVX2;
6937     for (size_t batch_size = 65; batch_size < 128; batch_size++) {
6938       VUnaryMicrokernelTester()
6939         .batch_size(batch_size)
6940         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x64, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6941     }
6942   }
6943 
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X64,inplace)6944   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X64, inplace) {
6945     TEST_REQUIRES_X86_AVX2;
6946     for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
6947       VUnaryMicrokernelTester()
6948         .batch_size(batch_size)
6949         .inplace(true)
6950         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x64, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6951     }
6952   }
6953 
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X64,prescale)6954   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X64, prescale) {
6955     TEST_REQUIRES_X86_AVX2;
6956     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
6957       for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
6958         VUnaryMicrokernelTester()
6959           .batch_size(batch_size)
6960           .prescale(prescale)
6961           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x64, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6962       }
6963     }
6964   }
6965 
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X64,alpha)6966   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X64, alpha) {
6967     TEST_REQUIRES_X86_AVX2;
6968     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
6969       for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
6970         VUnaryMicrokernelTester()
6971           .batch_size(batch_size)
6972           .alpha(alpha)
6973           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x64, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6974       }
6975     }
6976   }
6977 
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X64,beta)6978   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X64, beta) {
6979     TEST_REQUIRES_X86_AVX2;
6980     for (float beta : std::vector<float>({0.3f, 3.0f})) {
6981       for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
6982         VUnaryMicrokernelTester()
6983           .batch_size(batch_size)
6984           .beta(beta)
6985           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x64, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6986       }
6987     }
6988   }
6989 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
6990 
6991 
6992 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X72,batch_eq_72)6993   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X72, batch_eq_72) {
6994     TEST_REQUIRES_X86_AVX2;
6995     VUnaryMicrokernelTester()
6996       .batch_size(72)
6997       .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
6998   }
6999 
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X72,batch_div_72)7000   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X72, batch_div_72) {
7001     TEST_REQUIRES_X86_AVX2;
7002     for (size_t batch_size = 144; batch_size < 720; batch_size += 72) {
7003       VUnaryMicrokernelTester()
7004         .batch_size(batch_size)
7005         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
7006     }
7007   }
7008 
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X72,batch_lt_72)7009   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X72, batch_lt_72) {
7010     TEST_REQUIRES_X86_AVX2;
7011     for (size_t batch_size = 1; batch_size < 72; batch_size++) {
7012       VUnaryMicrokernelTester()
7013         .batch_size(batch_size)
7014         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
7015     }
7016   }
7017 
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X72,batch_gt_72)7018   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X72, batch_gt_72) {
7019     TEST_REQUIRES_X86_AVX2;
7020     for (size_t batch_size = 73; batch_size < 144; batch_size++) {
7021       VUnaryMicrokernelTester()
7022         .batch_size(batch_size)
7023         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
7024     }
7025   }
7026 
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X72,inplace)7027   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X72, inplace) {
7028     TEST_REQUIRES_X86_AVX2;
7029     for (size_t batch_size = 1; batch_size <= 360; batch_size += 71) {
7030       VUnaryMicrokernelTester()
7031         .batch_size(batch_size)
7032         .inplace(true)
7033         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
7034     }
7035   }
7036 
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X72,prescale)7037   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X72, prescale) {
7038     TEST_REQUIRES_X86_AVX2;
7039     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
7040       for (size_t batch_size = 1; batch_size <= 360; batch_size += 71) {
7041         VUnaryMicrokernelTester()
7042           .batch_size(batch_size)
7043           .prescale(prescale)
7044           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
7045       }
7046     }
7047   }
7048 
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X72,alpha)7049   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X72, alpha) {
7050     TEST_REQUIRES_X86_AVX2;
7051     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
7052       for (size_t batch_size = 1; batch_size <= 360; batch_size += 71) {
7053         VUnaryMicrokernelTester()
7054           .batch_size(batch_size)
7055           .alpha(alpha)
7056           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
7057       }
7058     }
7059   }
7060 
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X72,beta)7061   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X72, beta) {
7062     TEST_REQUIRES_X86_AVX2;
7063     for (float beta : std::vector<float>({0.3f, 3.0f})) {
7064       for (size_t batch_size = 1; batch_size <= 360; batch_size += 71) {
7065         VUnaryMicrokernelTester()
7066           .batch_size(batch_size)
7067           .beta(beta)
7068           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
7069       }
7070     }
7071   }
7072 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
7073 
7074 
7075 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X80,batch_eq_80)7076   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X80, batch_eq_80) {
7077     TEST_REQUIRES_X86_AVX2;
7078     VUnaryMicrokernelTester()
7079       .batch_size(80)
7080       .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
7081   }
7082 
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X80,batch_div_80)7083   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X80, batch_div_80) {
7084     TEST_REQUIRES_X86_AVX2;
7085     for (size_t batch_size = 160; batch_size < 800; batch_size += 80) {
7086       VUnaryMicrokernelTester()
7087         .batch_size(batch_size)
7088         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
7089     }
7090   }
7091 
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X80,batch_lt_80)7092   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X80, batch_lt_80) {
7093     TEST_REQUIRES_X86_AVX2;
7094     for (size_t batch_size = 1; batch_size < 80; batch_size++) {
7095       VUnaryMicrokernelTester()
7096         .batch_size(batch_size)
7097         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
7098     }
7099   }
7100 
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X80,batch_gt_80)7101   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X80, batch_gt_80) {
7102     TEST_REQUIRES_X86_AVX2;
7103     for (size_t batch_size = 81; batch_size < 160; batch_size++) {
7104       VUnaryMicrokernelTester()
7105         .batch_size(batch_size)
7106         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
7107     }
7108   }
7109 
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X80,inplace)7110   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X80, inplace) {
7111     TEST_REQUIRES_X86_AVX2;
7112     for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
7113       VUnaryMicrokernelTester()
7114         .batch_size(batch_size)
7115         .inplace(true)
7116         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
7117     }
7118   }
7119 
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X80,prescale)7120   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X80, prescale) {
7121     TEST_REQUIRES_X86_AVX2;
7122     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
7123       for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
7124         VUnaryMicrokernelTester()
7125           .batch_size(batch_size)
7126           .prescale(prescale)
7127           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
7128       }
7129     }
7130   }
7131 
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X80,alpha)7132   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X80, alpha) {
7133     TEST_REQUIRES_X86_AVX2;
7134     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
7135       for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
7136         VUnaryMicrokernelTester()
7137           .batch_size(batch_size)
7138           .alpha(alpha)
7139           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
7140       }
7141     }
7142   }
7143 
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X80,beta)7144   TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X80, beta) {
7145     TEST_REQUIRES_X86_AVX2;
7146     for (float beta : std::vector<float>({0.3f, 3.0f})) {
7147       for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
7148         VUnaryMicrokernelTester()
7149           .batch_size(batch_size)
7150           .beta(beta)
7151           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80, xnn_init_f32_elu_avx2_rr1_lut8_p4_params);
7152       }
7153     }
7154   }
7155 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
7156 
7157 
7158 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X8,batch_eq_8)7159   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X8, batch_eq_8) {
7160     TEST_REQUIRES_X86_AVX2;
7161     VUnaryMicrokernelTester()
7162       .batch_size(8)
7163       .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x8, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7164   }
7165 
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X8,batch_div_8)7166   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X8, batch_div_8) {
7167     TEST_REQUIRES_X86_AVX2;
7168     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
7169       VUnaryMicrokernelTester()
7170         .batch_size(batch_size)
7171         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x8, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7172     }
7173   }
7174 
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X8,batch_lt_8)7175   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X8, batch_lt_8) {
7176     TEST_REQUIRES_X86_AVX2;
7177     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
7178       VUnaryMicrokernelTester()
7179         .batch_size(batch_size)
7180         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x8, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7181     }
7182   }
7183 
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X8,batch_gt_8)7184   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X8, batch_gt_8) {
7185     TEST_REQUIRES_X86_AVX2;
7186     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
7187       VUnaryMicrokernelTester()
7188         .batch_size(batch_size)
7189         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x8, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7190     }
7191   }
7192 
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X8,inplace)7193   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X8, inplace) {
7194     TEST_REQUIRES_X86_AVX2;
7195     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
7196       VUnaryMicrokernelTester()
7197         .batch_size(batch_size)
7198         .inplace(true)
7199         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x8, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7200     }
7201   }
7202 
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X8,prescale)7203   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X8, prescale) {
7204     TEST_REQUIRES_X86_AVX2;
7205     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
7206       for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
7207         VUnaryMicrokernelTester()
7208           .batch_size(batch_size)
7209           .prescale(prescale)
7210           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x8, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7211       }
7212     }
7213   }
7214 
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X8,alpha)7215   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X8, alpha) {
7216     TEST_REQUIRES_X86_AVX2;
7217     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
7218       for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
7219         VUnaryMicrokernelTester()
7220           .batch_size(batch_size)
7221           .alpha(alpha)
7222           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x8, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7223       }
7224     }
7225   }
7226 
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X8,beta)7227   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X8, beta) {
7228     TEST_REQUIRES_X86_AVX2;
7229     for (float beta : std::vector<float>({0.3f, 3.0f})) {
7230       for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
7231         VUnaryMicrokernelTester()
7232           .batch_size(batch_size)
7233           .beta(beta)
7234           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x8, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7235       }
7236     }
7237   }
7238 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
7239 
7240 
7241 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X16,batch_eq_16)7242   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X16, batch_eq_16) {
7243     TEST_REQUIRES_X86_AVX2;
7244     VUnaryMicrokernelTester()
7245       .batch_size(16)
7246       .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x16, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7247   }
7248 
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X16,batch_div_16)7249   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X16, batch_div_16) {
7250     TEST_REQUIRES_X86_AVX2;
7251     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
7252       VUnaryMicrokernelTester()
7253         .batch_size(batch_size)
7254         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x16, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7255     }
7256   }
7257 
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X16,batch_lt_16)7258   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X16, batch_lt_16) {
7259     TEST_REQUIRES_X86_AVX2;
7260     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
7261       VUnaryMicrokernelTester()
7262         .batch_size(batch_size)
7263         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x16, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7264     }
7265   }
7266 
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X16,batch_gt_16)7267   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X16, batch_gt_16) {
7268     TEST_REQUIRES_X86_AVX2;
7269     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
7270       VUnaryMicrokernelTester()
7271         .batch_size(batch_size)
7272         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x16, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7273     }
7274   }
7275 
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X16,inplace)7276   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X16, inplace) {
7277     TEST_REQUIRES_X86_AVX2;
7278     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
7279       VUnaryMicrokernelTester()
7280         .batch_size(batch_size)
7281         .inplace(true)
7282         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x16, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7283     }
7284   }
7285 
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X16,prescale)7286   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X16, prescale) {
7287     TEST_REQUIRES_X86_AVX2;
7288     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
7289       for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
7290         VUnaryMicrokernelTester()
7291           .batch_size(batch_size)
7292           .prescale(prescale)
7293           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x16, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7294       }
7295     }
7296   }
7297 
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X16,alpha)7298   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X16, alpha) {
7299     TEST_REQUIRES_X86_AVX2;
7300     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
7301       for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
7302         VUnaryMicrokernelTester()
7303           .batch_size(batch_size)
7304           .alpha(alpha)
7305           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x16, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7306       }
7307     }
7308   }
7309 
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X16,beta)7310   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X16, beta) {
7311     TEST_REQUIRES_X86_AVX2;
7312     for (float beta : std::vector<float>({0.3f, 3.0f})) {
7313       for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
7314         VUnaryMicrokernelTester()
7315           .batch_size(batch_size)
7316           .beta(beta)
7317           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x16, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7318       }
7319     }
7320   }
7321 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
7322 
7323 
7324 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X24,batch_eq_24)7325   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X24, batch_eq_24) {
7326     TEST_REQUIRES_X86_AVX2;
7327     VUnaryMicrokernelTester()
7328       .batch_size(24)
7329       .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x24, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7330   }
7331 
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X24,batch_div_24)7332   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X24, batch_div_24) {
7333     TEST_REQUIRES_X86_AVX2;
7334     for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
7335       VUnaryMicrokernelTester()
7336         .batch_size(batch_size)
7337         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x24, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7338     }
7339   }
7340 
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X24,batch_lt_24)7341   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X24, batch_lt_24) {
7342     TEST_REQUIRES_X86_AVX2;
7343     for (size_t batch_size = 1; batch_size < 24; batch_size++) {
7344       VUnaryMicrokernelTester()
7345         .batch_size(batch_size)
7346         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x24, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7347     }
7348   }
7349 
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X24,batch_gt_24)7350   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X24, batch_gt_24) {
7351     TEST_REQUIRES_X86_AVX2;
7352     for (size_t batch_size = 25; batch_size < 48; batch_size++) {
7353       VUnaryMicrokernelTester()
7354         .batch_size(batch_size)
7355         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x24, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7356     }
7357   }
7358 
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X24,inplace)7359   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X24, inplace) {
7360     TEST_REQUIRES_X86_AVX2;
7361     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
7362       VUnaryMicrokernelTester()
7363         .batch_size(batch_size)
7364         .inplace(true)
7365         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x24, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7366     }
7367   }
7368 
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X24,prescale)7369   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X24, prescale) {
7370     TEST_REQUIRES_X86_AVX2;
7371     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
7372       for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
7373         VUnaryMicrokernelTester()
7374           .batch_size(batch_size)
7375           .prescale(prescale)
7376           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x24, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7377       }
7378     }
7379   }
7380 
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X24,alpha)7381   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X24, alpha) {
7382     TEST_REQUIRES_X86_AVX2;
7383     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
7384       for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
7385         VUnaryMicrokernelTester()
7386           .batch_size(batch_size)
7387           .alpha(alpha)
7388           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x24, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7389       }
7390     }
7391   }
7392 
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X24,beta)7393   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X24, beta) {
7394     TEST_REQUIRES_X86_AVX2;
7395     for (float beta : std::vector<float>({0.3f, 3.0f})) {
7396       for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
7397         VUnaryMicrokernelTester()
7398           .batch_size(batch_size)
7399           .beta(beta)
7400           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x24, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7401       }
7402     }
7403   }
7404 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
7405 
7406 
7407 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X32,batch_eq_32)7408   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X32, batch_eq_32) {
7409     TEST_REQUIRES_X86_AVX2;
7410     VUnaryMicrokernelTester()
7411       .batch_size(32)
7412       .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x32, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7413   }
7414 
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X32,batch_div_32)7415   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X32, batch_div_32) {
7416     TEST_REQUIRES_X86_AVX2;
7417     for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
7418       VUnaryMicrokernelTester()
7419         .batch_size(batch_size)
7420         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x32, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7421     }
7422   }
7423 
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X32,batch_lt_32)7424   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X32, batch_lt_32) {
7425     TEST_REQUIRES_X86_AVX2;
7426     for (size_t batch_size = 1; batch_size < 32; batch_size++) {
7427       VUnaryMicrokernelTester()
7428         .batch_size(batch_size)
7429         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x32, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7430     }
7431   }
7432 
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X32,batch_gt_32)7433   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X32, batch_gt_32) {
7434     TEST_REQUIRES_X86_AVX2;
7435     for (size_t batch_size = 33; batch_size < 64; batch_size++) {
7436       VUnaryMicrokernelTester()
7437         .batch_size(batch_size)
7438         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x32, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7439     }
7440   }
7441 
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X32,inplace)7442   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X32, inplace) {
7443     TEST_REQUIRES_X86_AVX2;
7444     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
7445       VUnaryMicrokernelTester()
7446         .batch_size(batch_size)
7447         .inplace(true)
7448         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x32, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7449     }
7450   }
7451 
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X32,prescale)7452   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X32, prescale) {
7453     TEST_REQUIRES_X86_AVX2;
7454     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
7455       for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
7456         VUnaryMicrokernelTester()
7457           .batch_size(batch_size)
7458           .prescale(prescale)
7459           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x32, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7460       }
7461     }
7462   }
7463 
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X32,alpha)7464   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X32, alpha) {
7465     TEST_REQUIRES_X86_AVX2;
7466     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
7467       for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
7468         VUnaryMicrokernelTester()
7469           .batch_size(batch_size)
7470           .alpha(alpha)
7471           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x32, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7472       }
7473     }
7474   }
7475 
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X32,beta)7476   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X32, beta) {
7477     TEST_REQUIRES_X86_AVX2;
7478     for (float beta : std::vector<float>({0.3f, 3.0f})) {
7479       for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
7480         VUnaryMicrokernelTester()
7481           .batch_size(batch_size)
7482           .beta(beta)
7483           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x32, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7484       }
7485     }
7486   }
7487 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
7488 
7489 
7490 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X40,batch_eq_40)7491   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X40, batch_eq_40) {
7492     TEST_REQUIRES_X86_AVX2;
7493     VUnaryMicrokernelTester()
7494       .batch_size(40)
7495       .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x40, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7496   }
7497 
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X40,batch_div_40)7498   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X40, batch_div_40) {
7499     TEST_REQUIRES_X86_AVX2;
7500     for (size_t batch_size = 80; batch_size < 400; batch_size += 40) {
7501       VUnaryMicrokernelTester()
7502         .batch_size(batch_size)
7503         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x40, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7504     }
7505   }
7506 
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X40,batch_lt_40)7507   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X40, batch_lt_40) {
7508     TEST_REQUIRES_X86_AVX2;
7509     for (size_t batch_size = 1; batch_size < 40; batch_size++) {
7510       VUnaryMicrokernelTester()
7511         .batch_size(batch_size)
7512         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x40, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7513     }
7514   }
7515 
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X40,batch_gt_40)7516   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X40, batch_gt_40) {
7517     TEST_REQUIRES_X86_AVX2;
7518     for (size_t batch_size = 41; batch_size < 80; batch_size++) {
7519       VUnaryMicrokernelTester()
7520         .batch_size(batch_size)
7521         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x40, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7522     }
7523   }
7524 
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X40,inplace)7525   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X40, inplace) {
7526     TEST_REQUIRES_X86_AVX2;
7527     for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
7528       VUnaryMicrokernelTester()
7529         .batch_size(batch_size)
7530         .inplace(true)
7531         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x40, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7532     }
7533   }
7534 
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X40,prescale)7535   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X40, prescale) {
7536     TEST_REQUIRES_X86_AVX2;
7537     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
7538       for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
7539         VUnaryMicrokernelTester()
7540           .batch_size(batch_size)
7541           .prescale(prescale)
7542           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x40, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7543       }
7544     }
7545   }
7546 
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X40,alpha)7547   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X40, alpha) {
7548     TEST_REQUIRES_X86_AVX2;
7549     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
7550       for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
7551         VUnaryMicrokernelTester()
7552           .batch_size(batch_size)
7553           .alpha(alpha)
7554           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x40, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7555       }
7556     }
7557   }
7558 
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X40,beta)7559   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X40, beta) {
7560     TEST_REQUIRES_X86_AVX2;
7561     for (float beta : std::vector<float>({0.3f, 3.0f})) {
7562       for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
7563         VUnaryMicrokernelTester()
7564           .batch_size(batch_size)
7565           .beta(beta)
7566           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x40, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7567       }
7568     }
7569   }
7570 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
7571 
7572 
7573 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X48,batch_eq_48)7574   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X48, batch_eq_48) {
7575     TEST_REQUIRES_X86_AVX2;
7576     VUnaryMicrokernelTester()
7577       .batch_size(48)
7578       .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x48, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7579   }
7580 
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X48,batch_div_48)7581   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X48, batch_div_48) {
7582     TEST_REQUIRES_X86_AVX2;
7583     for (size_t batch_size = 96; batch_size < 480; batch_size += 48) {
7584       VUnaryMicrokernelTester()
7585         .batch_size(batch_size)
7586         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x48, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7587     }
7588   }
7589 
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X48,batch_lt_48)7590   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X48, batch_lt_48) {
7591     TEST_REQUIRES_X86_AVX2;
7592     for (size_t batch_size = 1; batch_size < 48; batch_size++) {
7593       VUnaryMicrokernelTester()
7594         .batch_size(batch_size)
7595         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x48, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7596     }
7597   }
7598 
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X48,batch_gt_48)7599   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X48, batch_gt_48) {
7600     TEST_REQUIRES_X86_AVX2;
7601     for (size_t batch_size = 49; batch_size < 96; batch_size++) {
7602       VUnaryMicrokernelTester()
7603         .batch_size(batch_size)
7604         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x48, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7605     }
7606   }
7607 
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X48,inplace)7608   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X48, inplace) {
7609     TEST_REQUIRES_X86_AVX2;
7610     for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
7611       VUnaryMicrokernelTester()
7612         .batch_size(batch_size)
7613         .inplace(true)
7614         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x48, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7615     }
7616   }
7617 
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X48,prescale)7618   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X48, prescale) {
7619     TEST_REQUIRES_X86_AVX2;
7620     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
7621       for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
7622         VUnaryMicrokernelTester()
7623           .batch_size(batch_size)
7624           .prescale(prescale)
7625           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x48, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7626       }
7627     }
7628   }
7629 
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X48,alpha)7630   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X48, alpha) {
7631     TEST_REQUIRES_X86_AVX2;
7632     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
7633       for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
7634         VUnaryMicrokernelTester()
7635           .batch_size(batch_size)
7636           .alpha(alpha)
7637           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x48, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7638       }
7639     }
7640   }
7641 
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X48,beta)7642   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X48, beta) {
7643     TEST_REQUIRES_X86_AVX2;
7644     for (float beta : std::vector<float>({0.3f, 3.0f})) {
7645       for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
7646         VUnaryMicrokernelTester()
7647           .batch_size(batch_size)
7648           .beta(beta)
7649           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x48, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7650       }
7651     }
7652   }
7653 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
7654 
7655 
7656 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X56,batch_eq_56)7657   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X56, batch_eq_56) {
7658     TEST_REQUIRES_X86_AVX2;
7659     VUnaryMicrokernelTester()
7660       .batch_size(56)
7661       .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x56, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7662   }
7663 
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X56,batch_div_56)7664   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X56, batch_div_56) {
7665     TEST_REQUIRES_X86_AVX2;
7666     for (size_t batch_size = 112; batch_size < 560; batch_size += 56) {
7667       VUnaryMicrokernelTester()
7668         .batch_size(batch_size)
7669         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x56, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7670     }
7671   }
7672 
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X56,batch_lt_56)7673   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X56, batch_lt_56) {
7674     TEST_REQUIRES_X86_AVX2;
7675     for (size_t batch_size = 1; batch_size < 56; batch_size++) {
7676       VUnaryMicrokernelTester()
7677         .batch_size(batch_size)
7678         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x56, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7679     }
7680   }
7681 
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X56,batch_gt_56)7682   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X56, batch_gt_56) {
7683     TEST_REQUIRES_X86_AVX2;
7684     for (size_t batch_size = 57; batch_size < 112; batch_size++) {
7685       VUnaryMicrokernelTester()
7686         .batch_size(batch_size)
7687         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x56, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7688     }
7689   }
7690 
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X56,inplace)7691   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X56, inplace) {
7692     TEST_REQUIRES_X86_AVX2;
7693     for (size_t batch_size = 1; batch_size <= 280; batch_size += 55) {
7694       VUnaryMicrokernelTester()
7695         .batch_size(batch_size)
7696         .inplace(true)
7697         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x56, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7698     }
7699   }
7700 
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X56,prescale)7701   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X56, prescale) {
7702     TEST_REQUIRES_X86_AVX2;
7703     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
7704       for (size_t batch_size = 1; batch_size <= 280; batch_size += 55) {
7705         VUnaryMicrokernelTester()
7706           .batch_size(batch_size)
7707           .prescale(prescale)
7708           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x56, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7709       }
7710     }
7711   }
7712 
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X56,alpha)7713   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X56, alpha) {
7714     TEST_REQUIRES_X86_AVX2;
7715     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
7716       for (size_t batch_size = 1; batch_size <= 280; batch_size += 55) {
7717         VUnaryMicrokernelTester()
7718           .batch_size(batch_size)
7719           .alpha(alpha)
7720           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x56, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7721       }
7722     }
7723   }
7724 
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X56,beta)7725   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X56, beta) {
7726     TEST_REQUIRES_X86_AVX2;
7727     for (float beta : std::vector<float>({0.3f, 3.0f})) {
7728       for (size_t batch_size = 1; batch_size <= 280; batch_size += 55) {
7729         VUnaryMicrokernelTester()
7730           .batch_size(batch_size)
7731           .beta(beta)
7732           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x56, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7733       }
7734     }
7735   }
7736 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
7737 
7738 
7739 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X64,batch_eq_64)7740   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X64, batch_eq_64) {
7741     TEST_REQUIRES_X86_AVX2;
7742     VUnaryMicrokernelTester()
7743       .batch_size(64)
7744       .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x64, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7745   }
7746 
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X64,batch_div_64)7747   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X64, batch_div_64) {
7748     TEST_REQUIRES_X86_AVX2;
7749     for (size_t batch_size = 128; batch_size < 640; batch_size += 64) {
7750       VUnaryMicrokernelTester()
7751         .batch_size(batch_size)
7752         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x64, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7753     }
7754   }
7755 
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X64,batch_lt_64)7756   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X64, batch_lt_64) {
7757     TEST_REQUIRES_X86_AVX2;
7758     for (size_t batch_size = 1; batch_size < 64; batch_size++) {
7759       VUnaryMicrokernelTester()
7760         .batch_size(batch_size)
7761         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x64, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7762     }
7763   }
7764 
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X64,batch_gt_64)7765   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X64, batch_gt_64) {
7766     TEST_REQUIRES_X86_AVX2;
7767     for (size_t batch_size = 65; batch_size < 128; batch_size++) {
7768       VUnaryMicrokernelTester()
7769         .batch_size(batch_size)
7770         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x64, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7771     }
7772   }
7773 
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X64,inplace)7774   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X64, inplace) {
7775     TEST_REQUIRES_X86_AVX2;
7776     for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
7777       VUnaryMicrokernelTester()
7778         .batch_size(batch_size)
7779         .inplace(true)
7780         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x64, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7781     }
7782   }
7783 
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X64,prescale)7784   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X64, prescale) {
7785     TEST_REQUIRES_X86_AVX2;
7786     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
7787       for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
7788         VUnaryMicrokernelTester()
7789           .batch_size(batch_size)
7790           .prescale(prescale)
7791           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x64, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7792       }
7793     }
7794   }
7795 
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X64,alpha)7796   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X64, alpha) {
7797     TEST_REQUIRES_X86_AVX2;
7798     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
7799       for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
7800         VUnaryMicrokernelTester()
7801           .batch_size(batch_size)
7802           .alpha(alpha)
7803           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x64, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7804       }
7805     }
7806   }
7807 
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X64,beta)7808   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X64, beta) {
7809     TEST_REQUIRES_X86_AVX2;
7810     for (float beta : std::vector<float>({0.3f, 3.0f})) {
7811       for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
7812         VUnaryMicrokernelTester()
7813           .batch_size(batch_size)
7814           .beta(beta)
7815           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x64, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7816       }
7817     }
7818   }
7819 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
7820 
7821 
7822 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X72,batch_eq_72)7823   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X72, batch_eq_72) {
7824     TEST_REQUIRES_X86_AVX2;
7825     VUnaryMicrokernelTester()
7826       .batch_size(72)
7827       .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7828   }
7829 
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X72,batch_div_72)7830   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X72, batch_div_72) {
7831     TEST_REQUIRES_X86_AVX2;
7832     for (size_t batch_size = 144; batch_size < 720; batch_size += 72) {
7833       VUnaryMicrokernelTester()
7834         .batch_size(batch_size)
7835         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7836     }
7837   }
7838 
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X72,batch_lt_72)7839   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X72, batch_lt_72) {
7840     TEST_REQUIRES_X86_AVX2;
7841     for (size_t batch_size = 1; batch_size < 72; batch_size++) {
7842       VUnaryMicrokernelTester()
7843         .batch_size(batch_size)
7844         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7845     }
7846   }
7847 
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X72,batch_gt_72)7848   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X72, batch_gt_72) {
7849     TEST_REQUIRES_X86_AVX2;
7850     for (size_t batch_size = 73; batch_size < 144; batch_size++) {
7851       VUnaryMicrokernelTester()
7852         .batch_size(batch_size)
7853         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7854     }
7855   }
7856 
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X72,inplace)7857   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X72, inplace) {
7858     TEST_REQUIRES_X86_AVX2;
7859     for (size_t batch_size = 1; batch_size <= 360; batch_size += 71) {
7860       VUnaryMicrokernelTester()
7861         .batch_size(batch_size)
7862         .inplace(true)
7863         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7864     }
7865   }
7866 
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X72,prescale)7867   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X72, prescale) {
7868     TEST_REQUIRES_X86_AVX2;
7869     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
7870       for (size_t batch_size = 1; batch_size <= 360; batch_size += 71) {
7871         VUnaryMicrokernelTester()
7872           .batch_size(batch_size)
7873           .prescale(prescale)
7874           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7875       }
7876     }
7877   }
7878 
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X72,alpha)7879   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X72, alpha) {
7880     TEST_REQUIRES_X86_AVX2;
7881     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
7882       for (size_t batch_size = 1; batch_size <= 360; batch_size += 71) {
7883         VUnaryMicrokernelTester()
7884           .batch_size(batch_size)
7885           .alpha(alpha)
7886           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7887       }
7888     }
7889   }
7890 
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X72,beta)7891   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X72, beta) {
7892     TEST_REQUIRES_X86_AVX2;
7893     for (float beta : std::vector<float>({0.3f, 3.0f})) {
7894       for (size_t batch_size = 1; batch_size <= 360; batch_size += 71) {
7895         VUnaryMicrokernelTester()
7896           .batch_size(batch_size)
7897           .beta(beta)
7898           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7899       }
7900     }
7901   }
7902 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
7903 
7904 
7905 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X80,batch_eq_80)7906   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X80, batch_eq_80) {
7907     TEST_REQUIRES_X86_AVX2;
7908     VUnaryMicrokernelTester()
7909       .batch_size(80)
7910       .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7911   }
7912 
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X80,batch_div_80)7913   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X80, batch_div_80) {
7914     TEST_REQUIRES_X86_AVX2;
7915     for (size_t batch_size = 160; batch_size < 800; batch_size += 80) {
7916       VUnaryMicrokernelTester()
7917         .batch_size(batch_size)
7918         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7919     }
7920   }
7921 
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X80,batch_lt_80)7922   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X80, batch_lt_80) {
7923     TEST_REQUIRES_X86_AVX2;
7924     for (size_t batch_size = 1; batch_size < 80; batch_size++) {
7925       VUnaryMicrokernelTester()
7926         .batch_size(batch_size)
7927         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7928     }
7929   }
7930 
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X80,batch_gt_80)7931   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X80, batch_gt_80) {
7932     TEST_REQUIRES_X86_AVX2;
7933     for (size_t batch_size = 81; batch_size < 160; batch_size++) {
7934       VUnaryMicrokernelTester()
7935         .batch_size(batch_size)
7936         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7937     }
7938   }
7939 
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X80,inplace)7940   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X80, inplace) {
7941     TEST_REQUIRES_X86_AVX2;
7942     for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
7943       VUnaryMicrokernelTester()
7944         .batch_size(batch_size)
7945         .inplace(true)
7946         .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7947     }
7948   }
7949 
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X80,prescale)7950   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X80, prescale) {
7951     TEST_REQUIRES_X86_AVX2;
7952     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
7953       for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
7954         VUnaryMicrokernelTester()
7955           .batch_size(batch_size)
7956           .prescale(prescale)
7957           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7958       }
7959     }
7960   }
7961 
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X80,alpha)7962   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X80, alpha) {
7963     TEST_REQUIRES_X86_AVX2;
7964     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
7965       for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
7966         VUnaryMicrokernelTester()
7967           .batch_size(batch_size)
7968           .alpha(alpha)
7969           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7970       }
7971     }
7972   }
7973 
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X80,beta)7974   TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X80, beta) {
7975     TEST_REQUIRES_X86_AVX2;
7976     for (float beta : std::vector<float>({0.3f, 3.0f})) {
7977       for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
7978         VUnaryMicrokernelTester()
7979           .batch_size(batch_size)
7980           .beta(beta)
7981           .Test(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80, xnn_init_f32_elu_avx2_rr1_lut16_p3_params);
7982       }
7983     }
7984   }
7985 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
7986 
7987 
7988 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_P6_X8,batch_eq_8)7989   TEST(F32_VELU__AVX2_RR1_P6_X8, batch_eq_8) {
7990     TEST_REQUIRES_X86_AVX2;
7991     VUnaryMicrokernelTester()
7992       .batch_size(8)
7993       .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x8, xnn_init_f32_elu_avx2_rr1_p6_params);
7994   }
7995 
TEST(F32_VELU__AVX2_RR1_P6_X8,batch_div_8)7996   TEST(F32_VELU__AVX2_RR1_P6_X8, batch_div_8) {
7997     TEST_REQUIRES_X86_AVX2;
7998     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
7999       VUnaryMicrokernelTester()
8000         .batch_size(batch_size)
8001         .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x8, xnn_init_f32_elu_avx2_rr1_p6_params);
8002     }
8003   }
8004 
TEST(F32_VELU__AVX2_RR1_P6_X8,batch_lt_8)8005   TEST(F32_VELU__AVX2_RR1_P6_X8, batch_lt_8) {
8006     TEST_REQUIRES_X86_AVX2;
8007     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
8008       VUnaryMicrokernelTester()
8009         .batch_size(batch_size)
8010         .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x8, xnn_init_f32_elu_avx2_rr1_p6_params);
8011     }
8012   }
8013 
TEST(F32_VELU__AVX2_RR1_P6_X8,batch_gt_8)8014   TEST(F32_VELU__AVX2_RR1_P6_X8, batch_gt_8) {
8015     TEST_REQUIRES_X86_AVX2;
8016     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
8017       VUnaryMicrokernelTester()
8018         .batch_size(batch_size)
8019         .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x8, xnn_init_f32_elu_avx2_rr1_p6_params);
8020     }
8021   }
8022 
TEST(F32_VELU__AVX2_RR1_P6_X8,inplace)8023   TEST(F32_VELU__AVX2_RR1_P6_X8, inplace) {
8024     TEST_REQUIRES_X86_AVX2;
8025     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
8026       VUnaryMicrokernelTester()
8027         .batch_size(batch_size)
8028         .inplace(true)
8029         .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x8, xnn_init_f32_elu_avx2_rr1_p6_params);
8030     }
8031   }
8032 
TEST(F32_VELU__AVX2_RR1_P6_X8,prescale)8033   TEST(F32_VELU__AVX2_RR1_P6_X8, prescale) {
8034     TEST_REQUIRES_X86_AVX2;
8035     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
8036       for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
8037         VUnaryMicrokernelTester()
8038           .batch_size(batch_size)
8039           .prescale(prescale)
8040           .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x8, xnn_init_f32_elu_avx2_rr1_p6_params);
8041       }
8042     }
8043   }
8044 
TEST(F32_VELU__AVX2_RR1_P6_X8,alpha)8045   TEST(F32_VELU__AVX2_RR1_P6_X8, alpha) {
8046     TEST_REQUIRES_X86_AVX2;
8047     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
8048       for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
8049         VUnaryMicrokernelTester()
8050           .batch_size(batch_size)
8051           .alpha(alpha)
8052           .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x8, xnn_init_f32_elu_avx2_rr1_p6_params);
8053       }
8054     }
8055   }
8056 
TEST(F32_VELU__AVX2_RR1_P6_X8,beta)8057   TEST(F32_VELU__AVX2_RR1_P6_X8, beta) {
8058     TEST_REQUIRES_X86_AVX2;
8059     for (float beta : std::vector<float>({0.3f, 3.0f})) {
8060       for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
8061         VUnaryMicrokernelTester()
8062           .batch_size(batch_size)
8063           .beta(beta)
8064           .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x8, xnn_init_f32_elu_avx2_rr1_p6_params);
8065       }
8066     }
8067   }
8068 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
8069 
8070 
8071 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_P6_X16,batch_eq_16)8072   TEST(F32_VELU__AVX2_RR1_P6_X16, batch_eq_16) {
8073     TEST_REQUIRES_X86_AVX2;
8074     VUnaryMicrokernelTester()
8075       .batch_size(16)
8076       .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x16, xnn_init_f32_elu_avx2_rr1_p6_params);
8077   }
8078 
TEST(F32_VELU__AVX2_RR1_P6_X16,batch_div_16)8079   TEST(F32_VELU__AVX2_RR1_P6_X16, batch_div_16) {
8080     TEST_REQUIRES_X86_AVX2;
8081     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
8082       VUnaryMicrokernelTester()
8083         .batch_size(batch_size)
8084         .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x16, xnn_init_f32_elu_avx2_rr1_p6_params);
8085     }
8086   }
8087 
TEST(F32_VELU__AVX2_RR1_P6_X16,batch_lt_16)8088   TEST(F32_VELU__AVX2_RR1_P6_X16, batch_lt_16) {
8089     TEST_REQUIRES_X86_AVX2;
8090     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
8091       VUnaryMicrokernelTester()
8092         .batch_size(batch_size)
8093         .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x16, xnn_init_f32_elu_avx2_rr1_p6_params);
8094     }
8095   }
8096 
TEST(F32_VELU__AVX2_RR1_P6_X16,batch_gt_16)8097   TEST(F32_VELU__AVX2_RR1_P6_X16, batch_gt_16) {
8098     TEST_REQUIRES_X86_AVX2;
8099     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
8100       VUnaryMicrokernelTester()
8101         .batch_size(batch_size)
8102         .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x16, xnn_init_f32_elu_avx2_rr1_p6_params);
8103     }
8104   }
8105 
TEST(F32_VELU__AVX2_RR1_P6_X16,inplace)8106   TEST(F32_VELU__AVX2_RR1_P6_X16, inplace) {
8107     TEST_REQUIRES_X86_AVX2;
8108     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
8109       VUnaryMicrokernelTester()
8110         .batch_size(batch_size)
8111         .inplace(true)
8112         .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x16, xnn_init_f32_elu_avx2_rr1_p6_params);
8113     }
8114   }
8115 
TEST(F32_VELU__AVX2_RR1_P6_X16,prescale)8116   TEST(F32_VELU__AVX2_RR1_P6_X16, prescale) {
8117     TEST_REQUIRES_X86_AVX2;
8118     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
8119       for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
8120         VUnaryMicrokernelTester()
8121           .batch_size(batch_size)
8122           .prescale(prescale)
8123           .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x16, xnn_init_f32_elu_avx2_rr1_p6_params);
8124       }
8125     }
8126   }
8127 
TEST(F32_VELU__AVX2_RR1_P6_X16,alpha)8128   TEST(F32_VELU__AVX2_RR1_P6_X16, alpha) {
8129     TEST_REQUIRES_X86_AVX2;
8130     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
8131       for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
8132         VUnaryMicrokernelTester()
8133           .batch_size(batch_size)
8134           .alpha(alpha)
8135           .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x16, xnn_init_f32_elu_avx2_rr1_p6_params);
8136       }
8137     }
8138   }
8139 
TEST(F32_VELU__AVX2_RR1_P6_X16,beta)8140   TEST(F32_VELU__AVX2_RR1_P6_X16, beta) {
8141     TEST_REQUIRES_X86_AVX2;
8142     for (float beta : std::vector<float>({0.3f, 3.0f})) {
8143       for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
8144         VUnaryMicrokernelTester()
8145           .batch_size(batch_size)
8146           .beta(beta)
8147           .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x16, xnn_init_f32_elu_avx2_rr1_p6_params);
8148       }
8149     }
8150   }
8151 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
8152 
8153 
8154 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_P6_X24,batch_eq_24)8155   TEST(F32_VELU__AVX2_RR1_P6_X24, batch_eq_24) {
8156     TEST_REQUIRES_X86_AVX2;
8157     VUnaryMicrokernelTester()
8158       .batch_size(24)
8159       .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x24, xnn_init_f32_elu_avx2_rr1_p6_params);
8160   }
8161 
TEST(F32_VELU__AVX2_RR1_P6_X24,batch_div_24)8162   TEST(F32_VELU__AVX2_RR1_P6_X24, batch_div_24) {
8163     TEST_REQUIRES_X86_AVX2;
8164     for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
8165       VUnaryMicrokernelTester()
8166         .batch_size(batch_size)
8167         .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x24, xnn_init_f32_elu_avx2_rr1_p6_params);
8168     }
8169   }
8170 
TEST(F32_VELU__AVX2_RR1_P6_X24,batch_lt_24)8171   TEST(F32_VELU__AVX2_RR1_P6_X24, batch_lt_24) {
8172     TEST_REQUIRES_X86_AVX2;
8173     for (size_t batch_size = 1; batch_size < 24; batch_size++) {
8174       VUnaryMicrokernelTester()
8175         .batch_size(batch_size)
8176         .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x24, xnn_init_f32_elu_avx2_rr1_p6_params);
8177     }
8178   }
8179 
TEST(F32_VELU__AVX2_RR1_P6_X24,batch_gt_24)8180   TEST(F32_VELU__AVX2_RR1_P6_X24, batch_gt_24) {
8181     TEST_REQUIRES_X86_AVX2;
8182     for (size_t batch_size = 25; batch_size < 48; batch_size++) {
8183       VUnaryMicrokernelTester()
8184         .batch_size(batch_size)
8185         .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x24, xnn_init_f32_elu_avx2_rr1_p6_params);
8186     }
8187   }
8188 
TEST(F32_VELU__AVX2_RR1_P6_X24,inplace)8189   TEST(F32_VELU__AVX2_RR1_P6_X24, inplace) {
8190     TEST_REQUIRES_X86_AVX2;
8191     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
8192       VUnaryMicrokernelTester()
8193         .batch_size(batch_size)
8194         .inplace(true)
8195         .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x24, xnn_init_f32_elu_avx2_rr1_p6_params);
8196     }
8197   }
8198 
TEST(F32_VELU__AVX2_RR1_P6_X24,prescale)8199   TEST(F32_VELU__AVX2_RR1_P6_X24, prescale) {
8200     TEST_REQUIRES_X86_AVX2;
8201     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
8202       for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
8203         VUnaryMicrokernelTester()
8204           .batch_size(batch_size)
8205           .prescale(prescale)
8206           .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x24, xnn_init_f32_elu_avx2_rr1_p6_params);
8207       }
8208     }
8209   }
8210 
TEST(F32_VELU__AVX2_RR1_P6_X24,alpha)8211   TEST(F32_VELU__AVX2_RR1_P6_X24, alpha) {
8212     TEST_REQUIRES_X86_AVX2;
8213     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
8214       for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
8215         VUnaryMicrokernelTester()
8216           .batch_size(batch_size)
8217           .alpha(alpha)
8218           .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x24, xnn_init_f32_elu_avx2_rr1_p6_params);
8219       }
8220     }
8221   }
8222 
TEST(F32_VELU__AVX2_RR1_P6_X24,beta)8223   TEST(F32_VELU__AVX2_RR1_P6_X24, beta) {
8224     TEST_REQUIRES_X86_AVX2;
8225     for (float beta : std::vector<float>({0.3f, 3.0f})) {
8226       for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
8227         VUnaryMicrokernelTester()
8228           .batch_size(batch_size)
8229           .beta(beta)
8230           .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x24, xnn_init_f32_elu_avx2_rr1_p6_params);
8231       }
8232     }
8233   }
8234 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
8235 
8236 
8237 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_P6_X32,batch_eq_32)8238   TEST(F32_VELU__AVX2_RR1_P6_X32, batch_eq_32) {
8239     TEST_REQUIRES_X86_AVX2;
8240     VUnaryMicrokernelTester()
8241       .batch_size(32)
8242       .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x32, xnn_init_f32_elu_avx2_rr1_p6_params);
8243   }
8244 
TEST(F32_VELU__AVX2_RR1_P6_X32,batch_div_32)8245   TEST(F32_VELU__AVX2_RR1_P6_X32, batch_div_32) {
8246     TEST_REQUIRES_X86_AVX2;
8247     for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
8248       VUnaryMicrokernelTester()
8249         .batch_size(batch_size)
8250         .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x32, xnn_init_f32_elu_avx2_rr1_p6_params);
8251     }
8252   }
8253 
TEST(F32_VELU__AVX2_RR1_P6_X32,batch_lt_32)8254   TEST(F32_VELU__AVX2_RR1_P6_X32, batch_lt_32) {
8255     TEST_REQUIRES_X86_AVX2;
8256     for (size_t batch_size = 1; batch_size < 32; batch_size++) {
8257       VUnaryMicrokernelTester()
8258         .batch_size(batch_size)
8259         .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x32, xnn_init_f32_elu_avx2_rr1_p6_params);
8260     }
8261   }
8262 
TEST(F32_VELU__AVX2_RR1_P6_X32,batch_gt_32)8263   TEST(F32_VELU__AVX2_RR1_P6_X32, batch_gt_32) {
8264     TEST_REQUIRES_X86_AVX2;
8265     for (size_t batch_size = 33; batch_size < 64; batch_size++) {
8266       VUnaryMicrokernelTester()
8267         .batch_size(batch_size)
8268         .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x32, xnn_init_f32_elu_avx2_rr1_p6_params);
8269     }
8270   }
8271 
TEST(F32_VELU__AVX2_RR1_P6_X32,inplace)8272   TEST(F32_VELU__AVX2_RR1_P6_X32, inplace) {
8273     TEST_REQUIRES_X86_AVX2;
8274     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
8275       VUnaryMicrokernelTester()
8276         .batch_size(batch_size)
8277         .inplace(true)
8278         .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x32, xnn_init_f32_elu_avx2_rr1_p6_params);
8279     }
8280   }
8281 
TEST(F32_VELU__AVX2_RR1_P6_X32,prescale)8282   TEST(F32_VELU__AVX2_RR1_P6_X32, prescale) {
8283     TEST_REQUIRES_X86_AVX2;
8284     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
8285       for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
8286         VUnaryMicrokernelTester()
8287           .batch_size(batch_size)
8288           .prescale(prescale)
8289           .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x32, xnn_init_f32_elu_avx2_rr1_p6_params);
8290       }
8291     }
8292   }
8293 
TEST(F32_VELU__AVX2_RR1_P6_X32,alpha)8294   TEST(F32_VELU__AVX2_RR1_P6_X32, alpha) {
8295     TEST_REQUIRES_X86_AVX2;
8296     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
8297       for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
8298         VUnaryMicrokernelTester()
8299           .batch_size(batch_size)
8300           .alpha(alpha)
8301           .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x32, xnn_init_f32_elu_avx2_rr1_p6_params);
8302       }
8303     }
8304   }
8305 
TEST(F32_VELU__AVX2_RR1_P6_X32,beta)8306   TEST(F32_VELU__AVX2_RR1_P6_X32, beta) {
8307     TEST_REQUIRES_X86_AVX2;
8308     for (float beta : std::vector<float>({0.3f, 3.0f})) {
8309       for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
8310         VUnaryMicrokernelTester()
8311           .batch_size(batch_size)
8312           .beta(beta)
8313           .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x32, xnn_init_f32_elu_avx2_rr1_p6_params);
8314       }
8315     }
8316   }
8317 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
8318 
8319 
8320 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_P6_X40,batch_eq_40)8321   TEST(F32_VELU__AVX2_RR1_P6_X40, batch_eq_40) {
8322     TEST_REQUIRES_X86_AVX2;
8323     VUnaryMicrokernelTester()
8324       .batch_size(40)
8325       .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x40, xnn_init_f32_elu_avx2_rr1_p6_params);
8326   }
8327 
TEST(F32_VELU__AVX2_RR1_P6_X40,batch_div_40)8328   TEST(F32_VELU__AVX2_RR1_P6_X40, batch_div_40) {
8329     TEST_REQUIRES_X86_AVX2;
8330     for (size_t batch_size = 80; batch_size < 400; batch_size += 40) {
8331       VUnaryMicrokernelTester()
8332         .batch_size(batch_size)
8333         .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x40, xnn_init_f32_elu_avx2_rr1_p6_params);
8334     }
8335   }
8336 
TEST(F32_VELU__AVX2_RR1_P6_X40,batch_lt_40)8337   TEST(F32_VELU__AVX2_RR1_P6_X40, batch_lt_40) {
8338     TEST_REQUIRES_X86_AVX2;
8339     for (size_t batch_size = 1; batch_size < 40; batch_size++) {
8340       VUnaryMicrokernelTester()
8341         .batch_size(batch_size)
8342         .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x40, xnn_init_f32_elu_avx2_rr1_p6_params);
8343     }
8344   }
8345 
TEST(F32_VELU__AVX2_RR1_P6_X40,batch_gt_40)8346   TEST(F32_VELU__AVX2_RR1_P6_X40, batch_gt_40) {
8347     TEST_REQUIRES_X86_AVX2;
8348     for (size_t batch_size = 41; batch_size < 80; batch_size++) {
8349       VUnaryMicrokernelTester()
8350         .batch_size(batch_size)
8351         .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x40, xnn_init_f32_elu_avx2_rr1_p6_params);
8352     }
8353   }
8354 
TEST(F32_VELU__AVX2_RR1_P6_X40,inplace)8355   TEST(F32_VELU__AVX2_RR1_P6_X40, inplace) {
8356     TEST_REQUIRES_X86_AVX2;
8357     for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
8358       VUnaryMicrokernelTester()
8359         .batch_size(batch_size)
8360         .inplace(true)
8361         .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x40, xnn_init_f32_elu_avx2_rr1_p6_params);
8362     }
8363   }
8364 
TEST(F32_VELU__AVX2_RR1_P6_X40,prescale)8365   TEST(F32_VELU__AVX2_RR1_P6_X40, prescale) {
8366     TEST_REQUIRES_X86_AVX2;
8367     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
8368       for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
8369         VUnaryMicrokernelTester()
8370           .batch_size(batch_size)
8371           .prescale(prescale)
8372           .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x40, xnn_init_f32_elu_avx2_rr1_p6_params);
8373       }
8374     }
8375   }
8376 
TEST(F32_VELU__AVX2_RR1_P6_X40,alpha)8377   TEST(F32_VELU__AVX2_RR1_P6_X40, alpha) {
8378     TEST_REQUIRES_X86_AVX2;
8379     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
8380       for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
8381         VUnaryMicrokernelTester()
8382           .batch_size(batch_size)
8383           .alpha(alpha)
8384           .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x40, xnn_init_f32_elu_avx2_rr1_p6_params);
8385       }
8386     }
8387   }
8388 
TEST(F32_VELU__AVX2_RR1_P6_X40,beta)8389   TEST(F32_VELU__AVX2_RR1_P6_X40, beta) {
8390     TEST_REQUIRES_X86_AVX2;
8391     for (float beta : std::vector<float>({0.3f, 3.0f})) {
8392       for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
8393         VUnaryMicrokernelTester()
8394           .batch_size(batch_size)
8395           .beta(beta)
8396           .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x40, xnn_init_f32_elu_avx2_rr1_p6_params);
8397       }
8398     }
8399   }
8400 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
8401 
8402 
8403 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_P6_X48,batch_eq_48)8404   TEST(F32_VELU__AVX2_RR1_P6_X48, batch_eq_48) {
8405     TEST_REQUIRES_X86_AVX2;
8406     VUnaryMicrokernelTester()
8407       .batch_size(48)
8408       .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x48, xnn_init_f32_elu_avx2_rr1_p6_params);
8409   }
8410 
TEST(F32_VELU__AVX2_RR1_P6_X48,batch_div_48)8411   TEST(F32_VELU__AVX2_RR1_P6_X48, batch_div_48) {
8412     TEST_REQUIRES_X86_AVX2;
8413     for (size_t batch_size = 96; batch_size < 480; batch_size += 48) {
8414       VUnaryMicrokernelTester()
8415         .batch_size(batch_size)
8416         .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x48, xnn_init_f32_elu_avx2_rr1_p6_params);
8417     }
8418   }
8419 
TEST(F32_VELU__AVX2_RR1_P6_X48,batch_lt_48)8420   TEST(F32_VELU__AVX2_RR1_P6_X48, batch_lt_48) {
8421     TEST_REQUIRES_X86_AVX2;
8422     for (size_t batch_size = 1; batch_size < 48; batch_size++) {
8423       VUnaryMicrokernelTester()
8424         .batch_size(batch_size)
8425         .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x48, xnn_init_f32_elu_avx2_rr1_p6_params);
8426     }
8427   }
8428 
TEST(F32_VELU__AVX2_RR1_P6_X48,batch_gt_48)8429   TEST(F32_VELU__AVX2_RR1_P6_X48, batch_gt_48) {
8430     TEST_REQUIRES_X86_AVX2;
8431     for (size_t batch_size = 49; batch_size < 96; batch_size++) {
8432       VUnaryMicrokernelTester()
8433         .batch_size(batch_size)
8434         .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x48, xnn_init_f32_elu_avx2_rr1_p6_params);
8435     }
8436   }
8437 
TEST(F32_VELU__AVX2_RR1_P6_X48,inplace)8438   TEST(F32_VELU__AVX2_RR1_P6_X48, inplace) {
8439     TEST_REQUIRES_X86_AVX2;
8440     for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
8441       VUnaryMicrokernelTester()
8442         .batch_size(batch_size)
8443         .inplace(true)
8444         .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x48, xnn_init_f32_elu_avx2_rr1_p6_params);
8445     }
8446   }
8447 
TEST(F32_VELU__AVX2_RR1_P6_X48,prescale)8448   TEST(F32_VELU__AVX2_RR1_P6_X48, prescale) {
8449     TEST_REQUIRES_X86_AVX2;
8450     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
8451       for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
8452         VUnaryMicrokernelTester()
8453           .batch_size(batch_size)
8454           .prescale(prescale)
8455           .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x48, xnn_init_f32_elu_avx2_rr1_p6_params);
8456       }
8457     }
8458   }
8459 
TEST(F32_VELU__AVX2_RR1_P6_X48,alpha)8460   TEST(F32_VELU__AVX2_RR1_P6_X48, alpha) {
8461     TEST_REQUIRES_X86_AVX2;
8462     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
8463       for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
8464         VUnaryMicrokernelTester()
8465           .batch_size(batch_size)
8466           .alpha(alpha)
8467           .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x48, xnn_init_f32_elu_avx2_rr1_p6_params);
8468       }
8469     }
8470   }
8471 
TEST(F32_VELU__AVX2_RR1_P6_X48,beta)8472   TEST(F32_VELU__AVX2_RR1_P6_X48, beta) {
8473     TEST_REQUIRES_X86_AVX2;
8474     for (float beta : std::vector<float>({0.3f, 3.0f})) {
8475       for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
8476         VUnaryMicrokernelTester()
8477           .batch_size(batch_size)
8478           .beta(beta)
8479           .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x48, xnn_init_f32_elu_avx2_rr1_p6_params);
8480       }
8481     }
8482   }
8483 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
8484 
8485 
8486 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_P6_X56,batch_eq_56)8487   TEST(F32_VELU__AVX2_RR1_P6_X56, batch_eq_56) {
8488     TEST_REQUIRES_X86_AVX2;
8489     VUnaryMicrokernelTester()
8490       .batch_size(56)
8491       .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x56, xnn_init_f32_elu_avx2_rr1_p6_params);
8492   }
8493 
TEST(F32_VELU__AVX2_RR1_P6_X56,batch_div_56)8494   TEST(F32_VELU__AVX2_RR1_P6_X56, batch_div_56) {
8495     TEST_REQUIRES_X86_AVX2;
8496     for (size_t batch_size = 112; batch_size < 560; batch_size += 56) {
8497       VUnaryMicrokernelTester()
8498         .batch_size(batch_size)
8499         .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x56, xnn_init_f32_elu_avx2_rr1_p6_params);
8500     }
8501   }
8502 
TEST(F32_VELU__AVX2_RR1_P6_X56,batch_lt_56)8503   TEST(F32_VELU__AVX2_RR1_P6_X56, batch_lt_56) {
8504     TEST_REQUIRES_X86_AVX2;
8505     for (size_t batch_size = 1; batch_size < 56; batch_size++) {
8506       VUnaryMicrokernelTester()
8507         .batch_size(batch_size)
8508         .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x56, xnn_init_f32_elu_avx2_rr1_p6_params);
8509     }
8510   }
8511 
TEST(F32_VELU__AVX2_RR1_P6_X56,batch_gt_56)8512   TEST(F32_VELU__AVX2_RR1_P6_X56, batch_gt_56) {
8513     TEST_REQUIRES_X86_AVX2;
8514     for (size_t batch_size = 57; batch_size < 112; batch_size++) {
8515       VUnaryMicrokernelTester()
8516         .batch_size(batch_size)
8517         .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x56, xnn_init_f32_elu_avx2_rr1_p6_params);
8518     }
8519   }
8520 
TEST(F32_VELU__AVX2_RR1_P6_X56,inplace)8521   TEST(F32_VELU__AVX2_RR1_P6_X56, inplace) {
8522     TEST_REQUIRES_X86_AVX2;
8523     for (size_t batch_size = 1; batch_size <= 280; batch_size += 55) {
8524       VUnaryMicrokernelTester()
8525         .batch_size(batch_size)
8526         .inplace(true)
8527         .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x56, xnn_init_f32_elu_avx2_rr1_p6_params);
8528     }
8529   }
8530 
TEST(F32_VELU__AVX2_RR1_P6_X56,prescale)8531   TEST(F32_VELU__AVX2_RR1_P6_X56, prescale) {
8532     TEST_REQUIRES_X86_AVX2;
8533     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
8534       for (size_t batch_size = 1; batch_size <= 280; batch_size += 55) {
8535         VUnaryMicrokernelTester()
8536           .batch_size(batch_size)
8537           .prescale(prescale)
8538           .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x56, xnn_init_f32_elu_avx2_rr1_p6_params);
8539       }
8540     }
8541   }
8542 
TEST(F32_VELU__AVX2_RR1_P6_X56,alpha)8543   TEST(F32_VELU__AVX2_RR1_P6_X56, alpha) {
8544     TEST_REQUIRES_X86_AVX2;
8545     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
8546       for (size_t batch_size = 1; batch_size <= 280; batch_size += 55) {
8547         VUnaryMicrokernelTester()
8548           .batch_size(batch_size)
8549           .alpha(alpha)
8550           .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x56, xnn_init_f32_elu_avx2_rr1_p6_params);
8551       }
8552     }
8553   }
8554 
TEST(F32_VELU__AVX2_RR1_P6_X56,beta)8555   TEST(F32_VELU__AVX2_RR1_P6_X56, beta) {
8556     TEST_REQUIRES_X86_AVX2;
8557     for (float beta : std::vector<float>({0.3f, 3.0f})) {
8558       for (size_t batch_size = 1; batch_size <= 280; batch_size += 55) {
8559         VUnaryMicrokernelTester()
8560           .batch_size(batch_size)
8561           .beta(beta)
8562           .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x56, xnn_init_f32_elu_avx2_rr1_p6_params);
8563       }
8564     }
8565   }
8566 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
8567 
8568 
8569 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_P6_X64,batch_eq_64)8570   TEST(F32_VELU__AVX2_RR1_P6_X64, batch_eq_64) {
8571     TEST_REQUIRES_X86_AVX2;
8572     VUnaryMicrokernelTester()
8573       .batch_size(64)
8574       .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x64, xnn_init_f32_elu_avx2_rr1_p6_params);
8575   }
8576 
TEST(F32_VELU__AVX2_RR1_P6_X64,batch_div_64)8577   TEST(F32_VELU__AVX2_RR1_P6_X64, batch_div_64) {
8578     TEST_REQUIRES_X86_AVX2;
8579     for (size_t batch_size = 128; batch_size < 640; batch_size += 64) {
8580       VUnaryMicrokernelTester()
8581         .batch_size(batch_size)
8582         .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x64, xnn_init_f32_elu_avx2_rr1_p6_params);
8583     }
8584   }
8585 
TEST(F32_VELU__AVX2_RR1_P6_X64,batch_lt_64)8586   TEST(F32_VELU__AVX2_RR1_P6_X64, batch_lt_64) {
8587     TEST_REQUIRES_X86_AVX2;
8588     for (size_t batch_size = 1; batch_size < 64; batch_size++) {
8589       VUnaryMicrokernelTester()
8590         .batch_size(batch_size)
8591         .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x64, xnn_init_f32_elu_avx2_rr1_p6_params);
8592     }
8593   }
8594 
TEST(F32_VELU__AVX2_RR1_P6_X64,batch_gt_64)8595   TEST(F32_VELU__AVX2_RR1_P6_X64, batch_gt_64) {
8596     TEST_REQUIRES_X86_AVX2;
8597     for (size_t batch_size = 65; batch_size < 128; batch_size++) {
8598       VUnaryMicrokernelTester()
8599         .batch_size(batch_size)
8600         .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x64, xnn_init_f32_elu_avx2_rr1_p6_params);
8601     }
8602   }
8603 
TEST(F32_VELU__AVX2_RR1_P6_X64,inplace)8604   TEST(F32_VELU__AVX2_RR1_P6_X64, inplace) {
8605     TEST_REQUIRES_X86_AVX2;
8606     for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
8607       VUnaryMicrokernelTester()
8608         .batch_size(batch_size)
8609         .inplace(true)
8610         .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x64, xnn_init_f32_elu_avx2_rr1_p6_params);
8611     }
8612   }
8613 
TEST(F32_VELU__AVX2_RR1_P6_X64,prescale)8614   TEST(F32_VELU__AVX2_RR1_P6_X64, prescale) {
8615     TEST_REQUIRES_X86_AVX2;
8616     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
8617       for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
8618         VUnaryMicrokernelTester()
8619           .batch_size(batch_size)
8620           .prescale(prescale)
8621           .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x64, xnn_init_f32_elu_avx2_rr1_p6_params);
8622       }
8623     }
8624   }
8625 
TEST(F32_VELU__AVX2_RR1_P6_X64,alpha)8626   TEST(F32_VELU__AVX2_RR1_P6_X64, alpha) {
8627     TEST_REQUIRES_X86_AVX2;
8628     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
8629       for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
8630         VUnaryMicrokernelTester()
8631           .batch_size(batch_size)
8632           .alpha(alpha)
8633           .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x64, xnn_init_f32_elu_avx2_rr1_p6_params);
8634       }
8635     }
8636   }
8637 
TEST(F32_VELU__AVX2_RR1_P6_X64,beta)8638   TEST(F32_VELU__AVX2_RR1_P6_X64, beta) {
8639     TEST_REQUIRES_X86_AVX2;
8640     for (float beta : std::vector<float>({0.3f, 3.0f})) {
8641       for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
8642         VUnaryMicrokernelTester()
8643           .batch_size(batch_size)
8644           .beta(beta)
8645           .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x64, xnn_init_f32_elu_avx2_rr1_p6_params);
8646       }
8647     }
8648   }
8649 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
8650 
8651 
8652 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_P6_X72,batch_eq_72)8653   TEST(F32_VELU__AVX2_RR1_P6_X72, batch_eq_72) {
8654     TEST_REQUIRES_X86_AVX2;
8655     VUnaryMicrokernelTester()
8656       .batch_size(72)
8657       .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x72, xnn_init_f32_elu_avx2_rr1_p6_params);
8658   }
8659 
TEST(F32_VELU__AVX2_RR1_P6_X72,batch_div_72)8660   TEST(F32_VELU__AVX2_RR1_P6_X72, batch_div_72) {
8661     TEST_REQUIRES_X86_AVX2;
8662     for (size_t batch_size = 144; batch_size < 720; batch_size += 72) {
8663       VUnaryMicrokernelTester()
8664         .batch_size(batch_size)
8665         .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x72, xnn_init_f32_elu_avx2_rr1_p6_params);
8666     }
8667   }
8668 
TEST(F32_VELU__AVX2_RR1_P6_X72,batch_lt_72)8669   TEST(F32_VELU__AVX2_RR1_P6_X72, batch_lt_72) {
8670     TEST_REQUIRES_X86_AVX2;
8671     for (size_t batch_size = 1; batch_size < 72; batch_size++) {
8672       VUnaryMicrokernelTester()
8673         .batch_size(batch_size)
8674         .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x72, xnn_init_f32_elu_avx2_rr1_p6_params);
8675     }
8676   }
8677 
TEST(F32_VELU__AVX2_RR1_P6_X72,batch_gt_72)8678   TEST(F32_VELU__AVX2_RR1_P6_X72, batch_gt_72) {
8679     TEST_REQUIRES_X86_AVX2;
8680     for (size_t batch_size = 73; batch_size < 144; batch_size++) {
8681       VUnaryMicrokernelTester()
8682         .batch_size(batch_size)
8683         .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x72, xnn_init_f32_elu_avx2_rr1_p6_params);
8684     }
8685   }
8686 
TEST(F32_VELU__AVX2_RR1_P6_X72,inplace)8687   TEST(F32_VELU__AVX2_RR1_P6_X72, inplace) {
8688     TEST_REQUIRES_X86_AVX2;
8689     for (size_t batch_size = 1; batch_size <= 360; batch_size += 71) {
8690       VUnaryMicrokernelTester()
8691         .batch_size(batch_size)
8692         .inplace(true)
8693         .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x72, xnn_init_f32_elu_avx2_rr1_p6_params);
8694     }
8695   }
8696 
TEST(F32_VELU__AVX2_RR1_P6_X72,prescale)8697   TEST(F32_VELU__AVX2_RR1_P6_X72, prescale) {
8698     TEST_REQUIRES_X86_AVX2;
8699     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
8700       for (size_t batch_size = 1; batch_size <= 360; batch_size += 71) {
8701         VUnaryMicrokernelTester()
8702           .batch_size(batch_size)
8703           .prescale(prescale)
8704           .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x72, xnn_init_f32_elu_avx2_rr1_p6_params);
8705       }
8706     }
8707   }
8708 
TEST(F32_VELU__AVX2_RR1_P6_X72,alpha)8709   TEST(F32_VELU__AVX2_RR1_P6_X72, alpha) {
8710     TEST_REQUIRES_X86_AVX2;
8711     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
8712       for (size_t batch_size = 1; batch_size <= 360; batch_size += 71) {
8713         VUnaryMicrokernelTester()
8714           .batch_size(batch_size)
8715           .alpha(alpha)
8716           .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x72, xnn_init_f32_elu_avx2_rr1_p6_params);
8717       }
8718     }
8719   }
8720 
TEST(F32_VELU__AVX2_RR1_P6_X72,beta)8721   TEST(F32_VELU__AVX2_RR1_P6_X72, beta) {
8722     TEST_REQUIRES_X86_AVX2;
8723     for (float beta : std::vector<float>({0.3f, 3.0f})) {
8724       for (size_t batch_size = 1; batch_size <= 360; batch_size += 71) {
8725         VUnaryMicrokernelTester()
8726           .batch_size(batch_size)
8727           .beta(beta)
8728           .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x72, xnn_init_f32_elu_avx2_rr1_p6_params);
8729       }
8730     }
8731   }
8732 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
8733 
8734 
8735 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_P6_X80,batch_eq_80)8736   TEST(F32_VELU__AVX2_RR1_P6_X80, batch_eq_80) {
8737     TEST_REQUIRES_X86_AVX2;
8738     VUnaryMicrokernelTester()
8739       .batch_size(80)
8740       .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x80, xnn_init_f32_elu_avx2_rr1_p6_params);
8741   }
8742 
TEST(F32_VELU__AVX2_RR1_P6_X80,batch_div_80)8743   TEST(F32_VELU__AVX2_RR1_P6_X80, batch_div_80) {
8744     TEST_REQUIRES_X86_AVX2;
8745     for (size_t batch_size = 160; batch_size < 800; batch_size += 80) {
8746       VUnaryMicrokernelTester()
8747         .batch_size(batch_size)
8748         .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x80, xnn_init_f32_elu_avx2_rr1_p6_params);
8749     }
8750   }
8751 
TEST(F32_VELU__AVX2_RR1_P6_X80,batch_lt_80)8752   TEST(F32_VELU__AVX2_RR1_P6_X80, batch_lt_80) {
8753     TEST_REQUIRES_X86_AVX2;
8754     for (size_t batch_size = 1; batch_size < 80; batch_size++) {
8755       VUnaryMicrokernelTester()
8756         .batch_size(batch_size)
8757         .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x80, xnn_init_f32_elu_avx2_rr1_p6_params);
8758     }
8759   }
8760 
TEST(F32_VELU__AVX2_RR1_P6_X80,batch_gt_80)8761   TEST(F32_VELU__AVX2_RR1_P6_X80, batch_gt_80) {
8762     TEST_REQUIRES_X86_AVX2;
8763     for (size_t batch_size = 81; batch_size < 160; batch_size++) {
8764       VUnaryMicrokernelTester()
8765         .batch_size(batch_size)
8766         .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x80, xnn_init_f32_elu_avx2_rr1_p6_params);
8767     }
8768   }
8769 
TEST(F32_VELU__AVX2_RR1_P6_X80,inplace)8770   TEST(F32_VELU__AVX2_RR1_P6_X80, inplace) {
8771     TEST_REQUIRES_X86_AVX2;
8772     for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
8773       VUnaryMicrokernelTester()
8774         .batch_size(batch_size)
8775         .inplace(true)
8776         .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x80, xnn_init_f32_elu_avx2_rr1_p6_params);
8777     }
8778   }
8779 
TEST(F32_VELU__AVX2_RR1_P6_X80,prescale)8780   TEST(F32_VELU__AVX2_RR1_P6_X80, prescale) {
8781     TEST_REQUIRES_X86_AVX2;
8782     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
8783       for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
8784         VUnaryMicrokernelTester()
8785           .batch_size(batch_size)
8786           .prescale(prescale)
8787           .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x80, xnn_init_f32_elu_avx2_rr1_p6_params);
8788       }
8789     }
8790   }
8791 
TEST(F32_VELU__AVX2_RR1_P6_X80,alpha)8792   TEST(F32_VELU__AVX2_RR1_P6_X80, alpha) {
8793     TEST_REQUIRES_X86_AVX2;
8794     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
8795       for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
8796         VUnaryMicrokernelTester()
8797           .batch_size(batch_size)
8798           .alpha(alpha)
8799           .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x80, xnn_init_f32_elu_avx2_rr1_p6_params);
8800       }
8801     }
8802   }
8803 
TEST(F32_VELU__AVX2_RR1_P6_X80,beta)8804   TEST(F32_VELU__AVX2_RR1_P6_X80, beta) {
8805     TEST_REQUIRES_X86_AVX2;
8806     for (float beta : std::vector<float>({0.3f, 3.0f})) {
8807       for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
8808         VUnaryMicrokernelTester()
8809           .batch_size(batch_size)
8810           .beta(beta)
8811           .Test(xnn_f32_velu_ukernel__avx2_rr1_p6_x80, xnn_init_f32_elu_avx2_rr1_p6_params);
8812       }
8813     }
8814   }
8815 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
8816 
8817 
8818 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X16,batch_eq_16)8819   TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X16, batch_eq_16) {
8820     TEST_REQUIRES_X86_AVX512F;
8821     VUnaryMicrokernelTester()
8822       .batch_size(16)
8823       .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x16, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
8824   }
8825 
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X16,batch_div_16)8826   TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X16, batch_div_16) {
8827     TEST_REQUIRES_X86_AVX512F;
8828     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
8829       VUnaryMicrokernelTester()
8830         .batch_size(batch_size)
8831         .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x16, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
8832     }
8833   }
8834 
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X16,batch_lt_16)8835   TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X16, batch_lt_16) {
8836     TEST_REQUIRES_X86_AVX512F;
8837     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
8838       VUnaryMicrokernelTester()
8839         .batch_size(batch_size)
8840         .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x16, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
8841     }
8842   }
8843 
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X16,batch_gt_16)8844   TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X16, batch_gt_16) {
8845     TEST_REQUIRES_X86_AVX512F;
8846     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
8847       VUnaryMicrokernelTester()
8848         .batch_size(batch_size)
8849         .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x16, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
8850     }
8851   }
8852 
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X16,inplace)8853   TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X16, inplace) {
8854     TEST_REQUIRES_X86_AVX512F;
8855     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
8856       VUnaryMicrokernelTester()
8857         .batch_size(batch_size)
8858         .inplace(true)
8859         .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x16, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
8860     }
8861   }
8862 
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X16,prescale)8863   TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X16, prescale) {
8864     TEST_REQUIRES_X86_AVX512F;
8865     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
8866       for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
8867         VUnaryMicrokernelTester()
8868           .batch_size(batch_size)
8869           .prescale(prescale)
8870           .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x16, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
8871       }
8872     }
8873   }
8874 
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X16,alpha)8875   TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X16, alpha) {
8876     TEST_REQUIRES_X86_AVX512F;
8877     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
8878       for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
8879         VUnaryMicrokernelTester()
8880           .batch_size(batch_size)
8881           .alpha(alpha)
8882           .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x16, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
8883       }
8884     }
8885   }
8886 
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X16,beta)8887   TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X16, beta) {
8888     TEST_REQUIRES_X86_AVX512F;
8889     for (float beta : std::vector<float>({0.3f, 3.0f})) {
8890       for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
8891         VUnaryMicrokernelTester()
8892           .batch_size(batch_size)
8893           .beta(beta)
8894           .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x16, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
8895       }
8896     }
8897   }
8898 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
8899 
8900 
8901 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X32,batch_eq_32)8902   TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X32, batch_eq_32) {
8903     TEST_REQUIRES_X86_AVX512F;
8904     VUnaryMicrokernelTester()
8905       .batch_size(32)
8906       .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x32, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
8907   }
8908 
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X32,batch_div_32)8909   TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X32, batch_div_32) {
8910     TEST_REQUIRES_X86_AVX512F;
8911     for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
8912       VUnaryMicrokernelTester()
8913         .batch_size(batch_size)
8914         .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x32, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
8915     }
8916   }
8917 
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X32,batch_lt_32)8918   TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X32, batch_lt_32) {
8919     TEST_REQUIRES_X86_AVX512F;
8920     for (size_t batch_size = 1; batch_size < 32; batch_size++) {
8921       VUnaryMicrokernelTester()
8922         .batch_size(batch_size)
8923         .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x32, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
8924     }
8925   }
8926 
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X32,batch_gt_32)8927   TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X32, batch_gt_32) {
8928     TEST_REQUIRES_X86_AVX512F;
8929     for (size_t batch_size = 33; batch_size < 64; batch_size++) {
8930       VUnaryMicrokernelTester()
8931         .batch_size(batch_size)
8932         .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x32, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
8933     }
8934   }
8935 
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X32,inplace)8936   TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X32, inplace) {
8937     TEST_REQUIRES_X86_AVX512F;
8938     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
8939       VUnaryMicrokernelTester()
8940         .batch_size(batch_size)
8941         .inplace(true)
8942         .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x32, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
8943     }
8944   }
8945 
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X32,prescale)8946   TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X32, prescale) {
8947     TEST_REQUIRES_X86_AVX512F;
8948     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
8949       for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
8950         VUnaryMicrokernelTester()
8951           .batch_size(batch_size)
8952           .prescale(prescale)
8953           .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x32, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
8954       }
8955     }
8956   }
8957 
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X32,alpha)8958   TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X32, alpha) {
8959     TEST_REQUIRES_X86_AVX512F;
8960     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
8961       for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
8962         VUnaryMicrokernelTester()
8963           .batch_size(batch_size)
8964           .alpha(alpha)
8965           .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x32, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
8966       }
8967     }
8968   }
8969 
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X32,beta)8970   TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X32, beta) {
8971     TEST_REQUIRES_X86_AVX512F;
8972     for (float beta : std::vector<float>({0.3f, 3.0f})) {
8973       for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
8974         VUnaryMicrokernelTester()
8975           .batch_size(batch_size)
8976           .beta(beta)
8977           .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x32, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
8978       }
8979     }
8980   }
8981 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
8982 
8983 
8984 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X48,batch_eq_48)8985   TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X48, batch_eq_48) {
8986     TEST_REQUIRES_X86_AVX512F;
8987     VUnaryMicrokernelTester()
8988       .batch_size(48)
8989       .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x48, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
8990   }
8991 
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X48,batch_div_48)8992   TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X48, batch_div_48) {
8993     TEST_REQUIRES_X86_AVX512F;
8994     for (size_t batch_size = 96; batch_size < 480; batch_size += 48) {
8995       VUnaryMicrokernelTester()
8996         .batch_size(batch_size)
8997         .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x48, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
8998     }
8999   }
9000 
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X48,batch_lt_48)9001   TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X48, batch_lt_48) {
9002     TEST_REQUIRES_X86_AVX512F;
9003     for (size_t batch_size = 1; batch_size < 48; batch_size++) {
9004       VUnaryMicrokernelTester()
9005         .batch_size(batch_size)
9006         .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x48, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9007     }
9008   }
9009 
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X48,batch_gt_48)9010   TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X48, batch_gt_48) {
9011     TEST_REQUIRES_X86_AVX512F;
9012     for (size_t batch_size = 49; batch_size < 96; batch_size++) {
9013       VUnaryMicrokernelTester()
9014         .batch_size(batch_size)
9015         .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x48, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9016     }
9017   }
9018 
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X48,inplace)9019   TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X48, inplace) {
9020     TEST_REQUIRES_X86_AVX512F;
9021     for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
9022       VUnaryMicrokernelTester()
9023         .batch_size(batch_size)
9024         .inplace(true)
9025         .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x48, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9026     }
9027   }
9028 
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X48,prescale)9029   TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X48, prescale) {
9030     TEST_REQUIRES_X86_AVX512F;
9031     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
9032       for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
9033         VUnaryMicrokernelTester()
9034           .batch_size(batch_size)
9035           .prescale(prescale)
9036           .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x48, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9037       }
9038     }
9039   }
9040 
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X48,alpha)9041   TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X48, alpha) {
9042     TEST_REQUIRES_X86_AVX512F;
9043     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
9044       for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
9045         VUnaryMicrokernelTester()
9046           .batch_size(batch_size)
9047           .alpha(alpha)
9048           .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x48, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9049       }
9050     }
9051   }
9052 
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X48,beta)9053   TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X48, beta) {
9054     TEST_REQUIRES_X86_AVX512F;
9055     for (float beta : std::vector<float>({0.3f, 3.0f})) {
9056       for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
9057         VUnaryMicrokernelTester()
9058           .batch_size(batch_size)
9059           .beta(beta)
9060           .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x48, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9061       }
9062     }
9063   }
9064 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
9065 
9066 
9067 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X64,batch_eq_64)9068   TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X64, batch_eq_64) {
9069     TEST_REQUIRES_X86_AVX512F;
9070     VUnaryMicrokernelTester()
9071       .batch_size(64)
9072       .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x64, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9073   }
9074 
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X64,batch_div_64)9075   TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X64, batch_div_64) {
9076     TEST_REQUIRES_X86_AVX512F;
9077     for (size_t batch_size = 128; batch_size < 640; batch_size += 64) {
9078       VUnaryMicrokernelTester()
9079         .batch_size(batch_size)
9080         .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x64, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9081     }
9082   }
9083 
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X64,batch_lt_64)9084   TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X64, batch_lt_64) {
9085     TEST_REQUIRES_X86_AVX512F;
9086     for (size_t batch_size = 1; batch_size < 64; batch_size++) {
9087       VUnaryMicrokernelTester()
9088         .batch_size(batch_size)
9089         .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x64, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9090     }
9091   }
9092 
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X64,batch_gt_64)9093   TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X64, batch_gt_64) {
9094     TEST_REQUIRES_X86_AVX512F;
9095     for (size_t batch_size = 65; batch_size < 128; batch_size++) {
9096       VUnaryMicrokernelTester()
9097         .batch_size(batch_size)
9098         .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x64, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9099     }
9100   }
9101 
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X64,inplace)9102   TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X64, inplace) {
9103     TEST_REQUIRES_X86_AVX512F;
9104     for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
9105       VUnaryMicrokernelTester()
9106         .batch_size(batch_size)
9107         .inplace(true)
9108         .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x64, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9109     }
9110   }
9111 
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X64,prescale)9112   TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X64, prescale) {
9113     TEST_REQUIRES_X86_AVX512F;
9114     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
9115       for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
9116         VUnaryMicrokernelTester()
9117           .batch_size(batch_size)
9118           .prescale(prescale)
9119           .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x64, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9120       }
9121     }
9122   }
9123 
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X64,alpha)9124   TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X64, alpha) {
9125     TEST_REQUIRES_X86_AVX512F;
9126     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
9127       for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
9128         VUnaryMicrokernelTester()
9129           .batch_size(batch_size)
9130           .alpha(alpha)
9131           .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x64, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9132       }
9133     }
9134   }
9135 
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X64,beta)9136   TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X64, beta) {
9137     TEST_REQUIRES_X86_AVX512F;
9138     for (float beta : std::vector<float>({0.3f, 3.0f})) {
9139       for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
9140         VUnaryMicrokernelTester()
9141           .batch_size(batch_size)
9142           .beta(beta)
9143           .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x64, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9144       }
9145     }
9146   }
9147 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
9148 
9149 
9150 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X80,batch_eq_80)9151   TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X80, batch_eq_80) {
9152     TEST_REQUIRES_X86_AVX512F;
9153     VUnaryMicrokernelTester()
9154       .batch_size(80)
9155       .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x80, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9156   }
9157 
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X80,batch_div_80)9158   TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X80, batch_div_80) {
9159     TEST_REQUIRES_X86_AVX512F;
9160     for (size_t batch_size = 160; batch_size < 800; batch_size += 80) {
9161       VUnaryMicrokernelTester()
9162         .batch_size(batch_size)
9163         .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x80, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9164     }
9165   }
9166 
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X80,batch_lt_80)9167   TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X80, batch_lt_80) {
9168     TEST_REQUIRES_X86_AVX512F;
9169     for (size_t batch_size = 1; batch_size < 80; batch_size++) {
9170       VUnaryMicrokernelTester()
9171         .batch_size(batch_size)
9172         .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x80, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9173     }
9174   }
9175 
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X80,batch_gt_80)9176   TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X80, batch_gt_80) {
9177     TEST_REQUIRES_X86_AVX512F;
9178     for (size_t batch_size = 81; batch_size < 160; batch_size++) {
9179       VUnaryMicrokernelTester()
9180         .batch_size(batch_size)
9181         .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x80, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9182     }
9183   }
9184 
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X80,inplace)9185   TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X80, inplace) {
9186     TEST_REQUIRES_X86_AVX512F;
9187     for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
9188       VUnaryMicrokernelTester()
9189         .batch_size(batch_size)
9190         .inplace(true)
9191         .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x80, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9192     }
9193   }
9194 
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X80,prescale)9195   TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X80, prescale) {
9196     TEST_REQUIRES_X86_AVX512F;
9197     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
9198       for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
9199         VUnaryMicrokernelTester()
9200           .batch_size(batch_size)
9201           .prescale(prescale)
9202           .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x80, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9203       }
9204     }
9205   }
9206 
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X80,alpha)9207   TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X80, alpha) {
9208     TEST_REQUIRES_X86_AVX512F;
9209     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
9210       for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
9211         VUnaryMicrokernelTester()
9212           .batch_size(batch_size)
9213           .alpha(alpha)
9214           .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x80, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9215       }
9216     }
9217   }
9218 
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X80,beta)9219   TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X80, beta) {
9220     TEST_REQUIRES_X86_AVX512F;
9221     for (float beta : std::vector<float>({0.3f, 3.0f})) {
9222       for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
9223         VUnaryMicrokernelTester()
9224           .batch_size(batch_size)
9225           .beta(beta)
9226           .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x80, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9227       }
9228     }
9229   }
9230 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
9231 
9232 
9233 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X96,batch_eq_96)9234   TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X96, batch_eq_96) {
9235     TEST_REQUIRES_X86_AVX512F;
9236     VUnaryMicrokernelTester()
9237       .batch_size(96)
9238       .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9239   }
9240 
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X96,batch_div_96)9241   TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X96, batch_div_96) {
9242     TEST_REQUIRES_X86_AVX512F;
9243     for (size_t batch_size = 192; batch_size < 960; batch_size += 96) {
9244       VUnaryMicrokernelTester()
9245         .batch_size(batch_size)
9246         .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9247     }
9248   }
9249 
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X96,batch_lt_96)9250   TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X96, batch_lt_96) {
9251     TEST_REQUIRES_X86_AVX512F;
9252     for (size_t batch_size = 1; batch_size < 96; batch_size++) {
9253       VUnaryMicrokernelTester()
9254         .batch_size(batch_size)
9255         .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9256     }
9257   }
9258 
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X96,batch_gt_96)9259   TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X96, batch_gt_96) {
9260     TEST_REQUIRES_X86_AVX512F;
9261     for (size_t batch_size = 97; batch_size < 192; batch_size++) {
9262       VUnaryMicrokernelTester()
9263         .batch_size(batch_size)
9264         .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9265     }
9266   }
9267 
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X96,inplace)9268   TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X96, inplace) {
9269     TEST_REQUIRES_X86_AVX512F;
9270     for (size_t batch_size = 1; batch_size <= 480; batch_size += 95) {
9271       VUnaryMicrokernelTester()
9272         .batch_size(batch_size)
9273         .inplace(true)
9274         .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9275     }
9276   }
9277 
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X96,prescale)9278   TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X96, prescale) {
9279     TEST_REQUIRES_X86_AVX512F;
9280     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
9281       for (size_t batch_size = 1; batch_size <= 480; batch_size += 95) {
9282         VUnaryMicrokernelTester()
9283           .batch_size(batch_size)
9284           .prescale(prescale)
9285           .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9286       }
9287     }
9288   }
9289 
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X96,alpha)9290   TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X96, alpha) {
9291     TEST_REQUIRES_X86_AVX512F;
9292     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
9293       for (size_t batch_size = 1; batch_size <= 480; batch_size += 95) {
9294         VUnaryMicrokernelTester()
9295           .batch_size(batch_size)
9296           .alpha(alpha)
9297           .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9298       }
9299     }
9300   }
9301 
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X96,beta)9302   TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X96, beta) {
9303     TEST_REQUIRES_X86_AVX512F;
9304     for (float beta : std::vector<float>({0.3f, 3.0f})) {
9305       for (size_t batch_size = 1; batch_size <= 480; batch_size += 95) {
9306         VUnaryMicrokernelTester()
9307           .batch_size(batch_size)
9308           .beta(beta)
9309           .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9310       }
9311     }
9312   }
9313 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
9314 
9315 
9316 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X112,batch_eq_112)9317   TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X112, batch_eq_112) {
9318     TEST_REQUIRES_X86_AVX512F;
9319     VUnaryMicrokernelTester()
9320       .batch_size(112)
9321       .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9322   }
9323 
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X112,batch_div_112)9324   TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X112, batch_div_112) {
9325     TEST_REQUIRES_X86_AVX512F;
9326     for (size_t batch_size = 224; batch_size < 1120; batch_size += 112) {
9327       VUnaryMicrokernelTester()
9328         .batch_size(batch_size)
9329         .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9330     }
9331   }
9332 
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X112,batch_lt_112)9333   TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X112, batch_lt_112) {
9334     TEST_REQUIRES_X86_AVX512F;
9335     for (size_t batch_size = 1; batch_size < 112; batch_size++) {
9336       VUnaryMicrokernelTester()
9337         .batch_size(batch_size)
9338         .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9339     }
9340   }
9341 
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X112,batch_gt_112)9342   TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X112, batch_gt_112) {
9343     TEST_REQUIRES_X86_AVX512F;
9344     for (size_t batch_size = 113; batch_size < 224; batch_size++) {
9345       VUnaryMicrokernelTester()
9346         .batch_size(batch_size)
9347         .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9348     }
9349   }
9350 
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X112,inplace)9351   TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X112, inplace) {
9352     TEST_REQUIRES_X86_AVX512F;
9353     for (size_t batch_size = 1; batch_size <= 560; batch_size += 111) {
9354       VUnaryMicrokernelTester()
9355         .batch_size(batch_size)
9356         .inplace(true)
9357         .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9358     }
9359   }
9360 
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X112,prescale)9361   TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X112, prescale) {
9362     TEST_REQUIRES_X86_AVX512F;
9363     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
9364       for (size_t batch_size = 1; batch_size <= 560; batch_size += 111) {
9365         VUnaryMicrokernelTester()
9366           .batch_size(batch_size)
9367           .prescale(prescale)
9368           .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9369       }
9370     }
9371   }
9372 
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X112,alpha)9373   TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X112, alpha) {
9374     TEST_REQUIRES_X86_AVX512F;
9375     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
9376       for (size_t batch_size = 1; batch_size <= 560; batch_size += 111) {
9377         VUnaryMicrokernelTester()
9378           .batch_size(batch_size)
9379           .alpha(alpha)
9380           .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9381       }
9382     }
9383   }
9384 
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X112,beta)9385   TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X112, beta) {
9386     TEST_REQUIRES_X86_AVX512F;
9387     for (float beta : std::vector<float>({0.3f, 3.0f})) {
9388       for (size_t batch_size = 1; batch_size <= 560; batch_size += 111) {
9389         VUnaryMicrokernelTester()
9390           .batch_size(batch_size)
9391           .beta(beta)
9392           .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9393       }
9394     }
9395   }
9396 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
9397 
9398 
9399 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X128,batch_eq_128)9400   TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X128, batch_eq_128) {
9401     TEST_REQUIRES_X86_AVX512F;
9402     VUnaryMicrokernelTester()
9403       .batch_size(128)
9404       .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9405   }
9406 
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X128,batch_div_128)9407   TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X128, batch_div_128) {
9408     TEST_REQUIRES_X86_AVX512F;
9409     for (size_t batch_size = 256; batch_size < 1280; batch_size += 128) {
9410       VUnaryMicrokernelTester()
9411         .batch_size(batch_size)
9412         .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9413     }
9414   }
9415 
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X128,batch_lt_128)9416   TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X128, batch_lt_128) {
9417     TEST_REQUIRES_X86_AVX512F;
9418     for (size_t batch_size = 1; batch_size < 128; batch_size++) {
9419       VUnaryMicrokernelTester()
9420         .batch_size(batch_size)
9421         .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9422     }
9423   }
9424 
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X128,batch_gt_128)9425   TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X128, batch_gt_128) {
9426     TEST_REQUIRES_X86_AVX512F;
9427     for (size_t batch_size = 129; batch_size < 256; batch_size++) {
9428       VUnaryMicrokernelTester()
9429         .batch_size(batch_size)
9430         .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9431     }
9432   }
9433 
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X128,inplace)9434   TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X128, inplace) {
9435     TEST_REQUIRES_X86_AVX512F;
9436     for (size_t batch_size = 1; batch_size <= 640; batch_size += 127) {
9437       VUnaryMicrokernelTester()
9438         .batch_size(batch_size)
9439         .inplace(true)
9440         .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9441     }
9442   }
9443 
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X128,prescale)9444   TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X128, prescale) {
9445     TEST_REQUIRES_X86_AVX512F;
9446     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
9447       for (size_t batch_size = 1; batch_size <= 640; batch_size += 127) {
9448         VUnaryMicrokernelTester()
9449           .batch_size(batch_size)
9450           .prescale(prescale)
9451           .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9452       }
9453     }
9454   }
9455 
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X128,alpha)9456   TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X128, alpha) {
9457     TEST_REQUIRES_X86_AVX512F;
9458     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
9459       for (size_t batch_size = 1; batch_size <= 640; batch_size += 127) {
9460         VUnaryMicrokernelTester()
9461           .batch_size(batch_size)
9462           .alpha(alpha)
9463           .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9464       }
9465     }
9466   }
9467 
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X128,beta)9468   TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X128, beta) {
9469     TEST_REQUIRES_X86_AVX512F;
9470     for (float beta : std::vector<float>({0.3f, 3.0f})) {
9471       for (size_t batch_size = 1; batch_size <= 640; batch_size += 127) {
9472         VUnaryMicrokernelTester()
9473           .batch_size(batch_size)
9474           .beta(beta)
9475           .Test(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128, xnn_init_f32_elu_avx512_rr1_lut16_p3_params);
9476       }
9477     }
9478   }
9479 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
9480 
9481 
9482 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX512F_RR1_P6_X16,batch_eq_16)9483   TEST(F32_VELU__AVX512F_RR1_P6_X16, batch_eq_16) {
9484     TEST_REQUIRES_X86_AVX512F;
9485     VUnaryMicrokernelTester()
9486       .batch_size(16)
9487       .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x16, xnn_init_f32_elu_avx512_rr1_p6_params);
9488   }
9489 
TEST(F32_VELU__AVX512F_RR1_P6_X16,batch_div_16)9490   TEST(F32_VELU__AVX512F_RR1_P6_X16, batch_div_16) {
9491     TEST_REQUIRES_X86_AVX512F;
9492     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
9493       VUnaryMicrokernelTester()
9494         .batch_size(batch_size)
9495         .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x16, xnn_init_f32_elu_avx512_rr1_p6_params);
9496     }
9497   }
9498 
TEST(F32_VELU__AVX512F_RR1_P6_X16,batch_lt_16)9499   TEST(F32_VELU__AVX512F_RR1_P6_X16, batch_lt_16) {
9500     TEST_REQUIRES_X86_AVX512F;
9501     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
9502       VUnaryMicrokernelTester()
9503         .batch_size(batch_size)
9504         .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x16, xnn_init_f32_elu_avx512_rr1_p6_params);
9505     }
9506   }
9507 
TEST(F32_VELU__AVX512F_RR1_P6_X16,batch_gt_16)9508   TEST(F32_VELU__AVX512F_RR1_P6_X16, batch_gt_16) {
9509     TEST_REQUIRES_X86_AVX512F;
9510     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
9511       VUnaryMicrokernelTester()
9512         .batch_size(batch_size)
9513         .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x16, xnn_init_f32_elu_avx512_rr1_p6_params);
9514     }
9515   }
9516 
TEST(F32_VELU__AVX512F_RR1_P6_X16,inplace)9517   TEST(F32_VELU__AVX512F_RR1_P6_X16, inplace) {
9518     TEST_REQUIRES_X86_AVX512F;
9519     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
9520       VUnaryMicrokernelTester()
9521         .batch_size(batch_size)
9522         .inplace(true)
9523         .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x16, xnn_init_f32_elu_avx512_rr1_p6_params);
9524     }
9525   }
9526 
TEST(F32_VELU__AVX512F_RR1_P6_X16,prescale)9527   TEST(F32_VELU__AVX512F_RR1_P6_X16, prescale) {
9528     TEST_REQUIRES_X86_AVX512F;
9529     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
9530       for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
9531         VUnaryMicrokernelTester()
9532           .batch_size(batch_size)
9533           .prescale(prescale)
9534           .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x16, xnn_init_f32_elu_avx512_rr1_p6_params);
9535       }
9536     }
9537   }
9538 
TEST(F32_VELU__AVX512F_RR1_P6_X16,alpha)9539   TEST(F32_VELU__AVX512F_RR1_P6_X16, alpha) {
9540     TEST_REQUIRES_X86_AVX512F;
9541     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
9542       for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
9543         VUnaryMicrokernelTester()
9544           .batch_size(batch_size)
9545           .alpha(alpha)
9546           .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x16, xnn_init_f32_elu_avx512_rr1_p6_params);
9547       }
9548     }
9549   }
9550 
TEST(F32_VELU__AVX512F_RR1_P6_X16,beta)9551   TEST(F32_VELU__AVX512F_RR1_P6_X16, beta) {
9552     TEST_REQUIRES_X86_AVX512F;
9553     for (float beta : std::vector<float>({0.3f, 3.0f})) {
9554       for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
9555         VUnaryMicrokernelTester()
9556           .batch_size(batch_size)
9557           .beta(beta)
9558           .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x16, xnn_init_f32_elu_avx512_rr1_p6_params);
9559       }
9560     }
9561   }
9562 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
9563 
9564 
9565 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX512F_RR1_P6_X32,batch_eq_32)9566   TEST(F32_VELU__AVX512F_RR1_P6_X32, batch_eq_32) {
9567     TEST_REQUIRES_X86_AVX512F;
9568     VUnaryMicrokernelTester()
9569       .batch_size(32)
9570       .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x32, xnn_init_f32_elu_avx512_rr1_p6_params);
9571   }
9572 
TEST(F32_VELU__AVX512F_RR1_P6_X32,batch_div_32)9573   TEST(F32_VELU__AVX512F_RR1_P6_X32, batch_div_32) {
9574     TEST_REQUIRES_X86_AVX512F;
9575     for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
9576       VUnaryMicrokernelTester()
9577         .batch_size(batch_size)
9578         .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x32, xnn_init_f32_elu_avx512_rr1_p6_params);
9579     }
9580   }
9581 
TEST(F32_VELU__AVX512F_RR1_P6_X32,batch_lt_32)9582   TEST(F32_VELU__AVX512F_RR1_P6_X32, batch_lt_32) {
9583     TEST_REQUIRES_X86_AVX512F;
9584     for (size_t batch_size = 1; batch_size < 32; batch_size++) {
9585       VUnaryMicrokernelTester()
9586         .batch_size(batch_size)
9587         .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x32, xnn_init_f32_elu_avx512_rr1_p6_params);
9588     }
9589   }
9590 
TEST(F32_VELU__AVX512F_RR1_P6_X32,batch_gt_32)9591   TEST(F32_VELU__AVX512F_RR1_P6_X32, batch_gt_32) {
9592     TEST_REQUIRES_X86_AVX512F;
9593     for (size_t batch_size = 33; batch_size < 64; batch_size++) {
9594       VUnaryMicrokernelTester()
9595         .batch_size(batch_size)
9596         .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x32, xnn_init_f32_elu_avx512_rr1_p6_params);
9597     }
9598   }
9599 
TEST(F32_VELU__AVX512F_RR1_P6_X32,inplace)9600   TEST(F32_VELU__AVX512F_RR1_P6_X32, inplace) {
9601     TEST_REQUIRES_X86_AVX512F;
9602     for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
9603       VUnaryMicrokernelTester()
9604         .batch_size(batch_size)
9605         .inplace(true)
9606         .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x32, xnn_init_f32_elu_avx512_rr1_p6_params);
9607     }
9608   }
9609 
TEST(F32_VELU__AVX512F_RR1_P6_X32,prescale)9610   TEST(F32_VELU__AVX512F_RR1_P6_X32, prescale) {
9611     TEST_REQUIRES_X86_AVX512F;
9612     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
9613       for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
9614         VUnaryMicrokernelTester()
9615           .batch_size(batch_size)
9616           .prescale(prescale)
9617           .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x32, xnn_init_f32_elu_avx512_rr1_p6_params);
9618       }
9619     }
9620   }
9621 
TEST(F32_VELU__AVX512F_RR1_P6_X32,alpha)9622   TEST(F32_VELU__AVX512F_RR1_P6_X32, alpha) {
9623     TEST_REQUIRES_X86_AVX512F;
9624     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
9625       for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
9626         VUnaryMicrokernelTester()
9627           .batch_size(batch_size)
9628           .alpha(alpha)
9629           .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x32, xnn_init_f32_elu_avx512_rr1_p6_params);
9630       }
9631     }
9632   }
9633 
TEST(F32_VELU__AVX512F_RR1_P6_X32,beta)9634   TEST(F32_VELU__AVX512F_RR1_P6_X32, beta) {
9635     TEST_REQUIRES_X86_AVX512F;
9636     for (float beta : std::vector<float>({0.3f, 3.0f})) {
9637       for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
9638         VUnaryMicrokernelTester()
9639           .batch_size(batch_size)
9640           .beta(beta)
9641           .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x32, xnn_init_f32_elu_avx512_rr1_p6_params);
9642       }
9643     }
9644   }
9645 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
9646 
9647 
9648 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX512F_RR1_P6_X48,batch_eq_48)9649   TEST(F32_VELU__AVX512F_RR1_P6_X48, batch_eq_48) {
9650     TEST_REQUIRES_X86_AVX512F;
9651     VUnaryMicrokernelTester()
9652       .batch_size(48)
9653       .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x48, xnn_init_f32_elu_avx512_rr1_p6_params);
9654   }
9655 
TEST(F32_VELU__AVX512F_RR1_P6_X48,batch_div_48)9656   TEST(F32_VELU__AVX512F_RR1_P6_X48, batch_div_48) {
9657     TEST_REQUIRES_X86_AVX512F;
9658     for (size_t batch_size = 96; batch_size < 480; batch_size += 48) {
9659       VUnaryMicrokernelTester()
9660         .batch_size(batch_size)
9661         .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x48, xnn_init_f32_elu_avx512_rr1_p6_params);
9662     }
9663   }
9664 
TEST(F32_VELU__AVX512F_RR1_P6_X48,batch_lt_48)9665   TEST(F32_VELU__AVX512F_RR1_P6_X48, batch_lt_48) {
9666     TEST_REQUIRES_X86_AVX512F;
9667     for (size_t batch_size = 1; batch_size < 48; batch_size++) {
9668       VUnaryMicrokernelTester()
9669         .batch_size(batch_size)
9670         .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x48, xnn_init_f32_elu_avx512_rr1_p6_params);
9671     }
9672   }
9673 
TEST(F32_VELU__AVX512F_RR1_P6_X48,batch_gt_48)9674   TEST(F32_VELU__AVX512F_RR1_P6_X48, batch_gt_48) {
9675     TEST_REQUIRES_X86_AVX512F;
9676     for (size_t batch_size = 49; batch_size < 96; batch_size++) {
9677       VUnaryMicrokernelTester()
9678         .batch_size(batch_size)
9679         .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x48, xnn_init_f32_elu_avx512_rr1_p6_params);
9680     }
9681   }
9682 
TEST(F32_VELU__AVX512F_RR1_P6_X48,inplace)9683   TEST(F32_VELU__AVX512F_RR1_P6_X48, inplace) {
9684     TEST_REQUIRES_X86_AVX512F;
9685     for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
9686       VUnaryMicrokernelTester()
9687         .batch_size(batch_size)
9688         .inplace(true)
9689         .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x48, xnn_init_f32_elu_avx512_rr1_p6_params);
9690     }
9691   }
9692 
TEST(F32_VELU__AVX512F_RR1_P6_X48,prescale)9693   TEST(F32_VELU__AVX512F_RR1_P6_X48, prescale) {
9694     TEST_REQUIRES_X86_AVX512F;
9695     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
9696       for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
9697         VUnaryMicrokernelTester()
9698           .batch_size(batch_size)
9699           .prescale(prescale)
9700           .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x48, xnn_init_f32_elu_avx512_rr1_p6_params);
9701       }
9702     }
9703   }
9704 
TEST(F32_VELU__AVX512F_RR1_P6_X48,alpha)9705   TEST(F32_VELU__AVX512F_RR1_P6_X48, alpha) {
9706     TEST_REQUIRES_X86_AVX512F;
9707     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
9708       for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
9709         VUnaryMicrokernelTester()
9710           .batch_size(batch_size)
9711           .alpha(alpha)
9712           .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x48, xnn_init_f32_elu_avx512_rr1_p6_params);
9713       }
9714     }
9715   }
9716 
TEST(F32_VELU__AVX512F_RR1_P6_X48,beta)9717   TEST(F32_VELU__AVX512F_RR1_P6_X48, beta) {
9718     TEST_REQUIRES_X86_AVX512F;
9719     for (float beta : std::vector<float>({0.3f, 3.0f})) {
9720       for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
9721         VUnaryMicrokernelTester()
9722           .batch_size(batch_size)
9723           .beta(beta)
9724           .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x48, xnn_init_f32_elu_avx512_rr1_p6_params);
9725       }
9726     }
9727   }
9728 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
9729 
9730 
9731 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX512F_RR1_P6_X64,batch_eq_64)9732   TEST(F32_VELU__AVX512F_RR1_P6_X64, batch_eq_64) {
9733     TEST_REQUIRES_X86_AVX512F;
9734     VUnaryMicrokernelTester()
9735       .batch_size(64)
9736       .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x64, xnn_init_f32_elu_avx512_rr1_p6_params);
9737   }
9738 
TEST(F32_VELU__AVX512F_RR1_P6_X64,batch_div_64)9739   TEST(F32_VELU__AVX512F_RR1_P6_X64, batch_div_64) {
9740     TEST_REQUIRES_X86_AVX512F;
9741     for (size_t batch_size = 128; batch_size < 640; batch_size += 64) {
9742       VUnaryMicrokernelTester()
9743         .batch_size(batch_size)
9744         .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x64, xnn_init_f32_elu_avx512_rr1_p6_params);
9745     }
9746   }
9747 
TEST(F32_VELU__AVX512F_RR1_P6_X64,batch_lt_64)9748   TEST(F32_VELU__AVX512F_RR1_P6_X64, batch_lt_64) {
9749     TEST_REQUIRES_X86_AVX512F;
9750     for (size_t batch_size = 1; batch_size < 64; batch_size++) {
9751       VUnaryMicrokernelTester()
9752         .batch_size(batch_size)
9753         .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x64, xnn_init_f32_elu_avx512_rr1_p6_params);
9754     }
9755   }
9756 
TEST(F32_VELU__AVX512F_RR1_P6_X64,batch_gt_64)9757   TEST(F32_VELU__AVX512F_RR1_P6_X64, batch_gt_64) {
9758     TEST_REQUIRES_X86_AVX512F;
9759     for (size_t batch_size = 65; batch_size < 128; batch_size++) {
9760       VUnaryMicrokernelTester()
9761         .batch_size(batch_size)
9762         .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x64, xnn_init_f32_elu_avx512_rr1_p6_params);
9763     }
9764   }
9765 
TEST(F32_VELU__AVX512F_RR1_P6_X64,inplace)9766   TEST(F32_VELU__AVX512F_RR1_P6_X64, inplace) {
9767     TEST_REQUIRES_X86_AVX512F;
9768     for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
9769       VUnaryMicrokernelTester()
9770         .batch_size(batch_size)
9771         .inplace(true)
9772         .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x64, xnn_init_f32_elu_avx512_rr1_p6_params);
9773     }
9774   }
9775 
TEST(F32_VELU__AVX512F_RR1_P6_X64,prescale)9776   TEST(F32_VELU__AVX512F_RR1_P6_X64, prescale) {
9777     TEST_REQUIRES_X86_AVX512F;
9778     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
9779       for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
9780         VUnaryMicrokernelTester()
9781           .batch_size(batch_size)
9782           .prescale(prescale)
9783           .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x64, xnn_init_f32_elu_avx512_rr1_p6_params);
9784       }
9785     }
9786   }
9787 
TEST(F32_VELU__AVX512F_RR1_P6_X64,alpha)9788   TEST(F32_VELU__AVX512F_RR1_P6_X64, alpha) {
9789     TEST_REQUIRES_X86_AVX512F;
9790     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
9791       for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
9792         VUnaryMicrokernelTester()
9793           .batch_size(batch_size)
9794           .alpha(alpha)
9795           .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x64, xnn_init_f32_elu_avx512_rr1_p6_params);
9796       }
9797     }
9798   }
9799 
TEST(F32_VELU__AVX512F_RR1_P6_X64,beta)9800   TEST(F32_VELU__AVX512F_RR1_P6_X64, beta) {
9801     TEST_REQUIRES_X86_AVX512F;
9802     for (float beta : std::vector<float>({0.3f, 3.0f})) {
9803       for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
9804         VUnaryMicrokernelTester()
9805           .batch_size(batch_size)
9806           .beta(beta)
9807           .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x64, xnn_init_f32_elu_avx512_rr1_p6_params);
9808       }
9809     }
9810   }
9811 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
9812 
9813 
9814 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX512F_RR1_P6_X80,batch_eq_80)9815   TEST(F32_VELU__AVX512F_RR1_P6_X80, batch_eq_80) {
9816     TEST_REQUIRES_X86_AVX512F;
9817     VUnaryMicrokernelTester()
9818       .batch_size(80)
9819       .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x80, xnn_init_f32_elu_avx512_rr1_p6_params);
9820   }
9821 
TEST(F32_VELU__AVX512F_RR1_P6_X80,batch_div_80)9822   TEST(F32_VELU__AVX512F_RR1_P6_X80, batch_div_80) {
9823     TEST_REQUIRES_X86_AVX512F;
9824     for (size_t batch_size = 160; batch_size < 800; batch_size += 80) {
9825       VUnaryMicrokernelTester()
9826         .batch_size(batch_size)
9827         .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x80, xnn_init_f32_elu_avx512_rr1_p6_params);
9828     }
9829   }
9830 
TEST(F32_VELU__AVX512F_RR1_P6_X80,batch_lt_80)9831   TEST(F32_VELU__AVX512F_RR1_P6_X80, batch_lt_80) {
9832     TEST_REQUIRES_X86_AVX512F;
9833     for (size_t batch_size = 1; batch_size < 80; batch_size++) {
9834       VUnaryMicrokernelTester()
9835         .batch_size(batch_size)
9836         .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x80, xnn_init_f32_elu_avx512_rr1_p6_params);
9837     }
9838   }
9839 
TEST(F32_VELU__AVX512F_RR1_P6_X80,batch_gt_80)9840   TEST(F32_VELU__AVX512F_RR1_P6_X80, batch_gt_80) {
9841     TEST_REQUIRES_X86_AVX512F;
9842     for (size_t batch_size = 81; batch_size < 160; batch_size++) {
9843       VUnaryMicrokernelTester()
9844         .batch_size(batch_size)
9845         .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x80, xnn_init_f32_elu_avx512_rr1_p6_params);
9846     }
9847   }
9848 
TEST(F32_VELU__AVX512F_RR1_P6_X80,inplace)9849   TEST(F32_VELU__AVX512F_RR1_P6_X80, inplace) {
9850     TEST_REQUIRES_X86_AVX512F;
9851     for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
9852       VUnaryMicrokernelTester()
9853         .batch_size(batch_size)
9854         .inplace(true)
9855         .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x80, xnn_init_f32_elu_avx512_rr1_p6_params);
9856     }
9857   }
9858 
TEST(F32_VELU__AVX512F_RR1_P6_X80,prescale)9859   TEST(F32_VELU__AVX512F_RR1_P6_X80, prescale) {
9860     TEST_REQUIRES_X86_AVX512F;
9861     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
9862       for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
9863         VUnaryMicrokernelTester()
9864           .batch_size(batch_size)
9865           .prescale(prescale)
9866           .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x80, xnn_init_f32_elu_avx512_rr1_p6_params);
9867       }
9868     }
9869   }
9870 
TEST(F32_VELU__AVX512F_RR1_P6_X80,alpha)9871   TEST(F32_VELU__AVX512F_RR1_P6_X80, alpha) {
9872     TEST_REQUIRES_X86_AVX512F;
9873     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
9874       for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
9875         VUnaryMicrokernelTester()
9876           .batch_size(batch_size)
9877           .alpha(alpha)
9878           .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x80, xnn_init_f32_elu_avx512_rr1_p6_params);
9879       }
9880     }
9881   }
9882 
TEST(F32_VELU__AVX512F_RR1_P6_X80,beta)9883   TEST(F32_VELU__AVX512F_RR1_P6_X80, beta) {
9884     TEST_REQUIRES_X86_AVX512F;
9885     for (float beta : std::vector<float>({0.3f, 3.0f})) {
9886       for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
9887         VUnaryMicrokernelTester()
9888           .batch_size(batch_size)
9889           .beta(beta)
9890           .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x80, xnn_init_f32_elu_avx512_rr1_p6_params);
9891       }
9892     }
9893   }
9894 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
9895 
9896 
9897 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX512F_RR1_P6_X96,batch_eq_96)9898   TEST(F32_VELU__AVX512F_RR1_P6_X96, batch_eq_96) {
9899     TEST_REQUIRES_X86_AVX512F;
9900     VUnaryMicrokernelTester()
9901       .batch_size(96)
9902       .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x96, xnn_init_f32_elu_avx512_rr1_p6_params);
9903   }
9904 
TEST(F32_VELU__AVX512F_RR1_P6_X96,batch_div_96)9905   TEST(F32_VELU__AVX512F_RR1_P6_X96, batch_div_96) {
9906     TEST_REQUIRES_X86_AVX512F;
9907     for (size_t batch_size = 192; batch_size < 960; batch_size += 96) {
9908       VUnaryMicrokernelTester()
9909         .batch_size(batch_size)
9910         .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x96, xnn_init_f32_elu_avx512_rr1_p6_params);
9911     }
9912   }
9913 
TEST(F32_VELU__AVX512F_RR1_P6_X96,batch_lt_96)9914   TEST(F32_VELU__AVX512F_RR1_P6_X96, batch_lt_96) {
9915     TEST_REQUIRES_X86_AVX512F;
9916     for (size_t batch_size = 1; batch_size < 96; batch_size++) {
9917       VUnaryMicrokernelTester()
9918         .batch_size(batch_size)
9919         .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x96, xnn_init_f32_elu_avx512_rr1_p6_params);
9920     }
9921   }
9922 
TEST(F32_VELU__AVX512F_RR1_P6_X96,batch_gt_96)9923   TEST(F32_VELU__AVX512F_RR1_P6_X96, batch_gt_96) {
9924     TEST_REQUIRES_X86_AVX512F;
9925     for (size_t batch_size = 97; batch_size < 192; batch_size++) {
9926       VUnaryMicrokernelTester()
9927         .batch_size(batch_size)
9928         .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x96, xnn_init_f32_elu_avx512_rr1_p6_params);
9929     }
9930   }
9931 
TEST(F32_VELU__AVX512F_RR1_P6_X96,inplace)9932   TEST(F32_VELU__AVX512F_RR1_P6_X96, inplace) {
9933     TEST_REQUIRES_X86_AVX512F;
9934     for (size_t batch_size = 1; batch_size <= 480; batch_size += 95) {
9935       VUnaryMicrokernelTester()
9936         .batch_size(batch_size)
9937         .inplace(true)
9938         .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x96, xnn_init_f32_elu_avx512_rr1_p6_params);
9939     }
9940   }
9941 
TEST(F32_VELU__AVX512F_RR1_P6_X96,prescale)9942   TEST(F32_VELU__AVX512F_RR1_P6_X96, prescale) {
9943     TEST_REQUIRES_X86_AVX512F;
9944     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
9945       for (size_t batch_size = 1; batch_size <= 480; batch_size += 95) {
9946         VUnaryMicrokernelTester()
9947           .batch_size(batch_size)
9948           .prescale(prescale)
9949           .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x96, xnn_init_f32_elu_avx512_rr1_p6_params);
9950       }
9951     }
9952   }
9953 
TEST(F32_VELU__AVX512F_RR1_P6_X96,alpha)9954   TEST(F32_VELU__AVX512F_RR1_P6_X96, alpha) {
9955     TEST_REQUIRES_X86_AVX512F;
9956     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
9957       for (size_t batch_size = 1; batch_size <= 480; batch_size += 95) {
9958         VUnaryMicrokernelTester()
9959           .batch_size(batch_size)
9960           .alpha(alpha)
9961           .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x96, xnn_init_f32_elu_avx512_rr1_p6_params);
9962       }
9963     }
9964   }
9965 
TEST(F32_VELU__AVX512F_RR1_P6_X96,beta)9966   TEST(F32_VELU__AVX512F_RR1_P6_X96, beta) {
9967     TEST_REQUIRES_X86_AVX512F;
9968     for (float beta : std::vector<float>({0.3f, 3.0f})) {
9969       for (size_t batch_size = 1; batch_size <= 480; batch_size += 95) {
9970         VUnaryMicrokernelTester()
9971           .batch_size(batch_size)
9972           .beta(beta)
9973           .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x96, xnn_init_f32_elu_avx512_rr1_p6_params);
9974       }
9975     }
9976   }
9977 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
9978 
9979 
9980 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX512F_RR1_P6_X112,batch_eq_112)9981   TEST(F32_VELU__AVX512F_RR1_P6_X112, batch_eq_112) {
9982     TEST_REQUIRES_X86_AVX512F;
9983     VUnaryMicrokernelTester()
9984       .batch_size(112)
9985       .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x112, xnn_init_f32_elu_avx512_rr1_p6_params);
9986   }
9987 
TEST(F32_VELU__AVX512F_RR1_P6_X112,batch_div_112)9988   TEST(F32_VELU__AVX512F_RR1_P6_X112, batch_div_112) {
9989     TEST_REQUIRES_X86_AVX512F;
9990     for (size_t batch_size = 224; batch_size < 1120; batch_size += 112) {
9991       VUnaryMicrokernelTester()
9992         .batch_size(batch_size)
9993         .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x112, xnn_init_f32_elu_avx512_rr1_p6_params);
9994     }
9995   }
9996 
TEST(F32_VELU__AVX512F_RR1_P6_X112,batch_lt_112)9997   TEST(F32_VELU__AVX512F_RR1_P6_X112, batch_lt_112) {
9998     TEST_REQUIRES_X86_AVX512F;
9999     for (size_t batch_size = 1; batch_size < 112; batch_size++) {
10000       VUnaryMicrokernelTester()
10001         .batch_size(batch_size)
10002         .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x112, xnn_init_f32_elu_avx512_rr1_p6_params);
10003     }
10004   }
10005 
TEST(F32_VELU__AVX512F_RR1_P6_X112,batch_gt_112)10006   TEST(F32_VELU__AVX512F_RR1_P6_X112, batch_gt_112) {
10007     TEST_REQUIRES_X86_AVX512F;
10008     for (size_t batch_size = 113; batch_size < 224; batch_size++) {
10009       VUnaryMicrokernelTester()
10010         .batch_size(batch_size)
10011         .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x112, xnn_init_f32_elu_avx512_rr1_p6_params);
10012     }
10013   }
10014 
TEST(F32_VELU__AVX512F_RR1_P6_X112,inplace)10015   TEST(F32_VELU__AVX512F_RR1_P6_X112, inplace) {
10016     TEST_REQUIRES_X86_AVX512F;
10017     for (size_t batch_size = 1; batch_size <= 560; batch_size += 111) {
10018       VUnaryMicrokernelTester()
10019         .batch_size(batch_size)
10020         .inplace(true)
10021         .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x112, xnn_init_f32_elu_avx512_rr1_p6_params);
10022     }
10023   }
10024 
TEST(F32_VELU__AVX512F_RR1_P6_X112,prescale)10025   TEST(F32_VELU__AVX512F_RR1_P6_X112, prescale) {
10026     TEST_REQUIRES_X86_AVX512F;
10027     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
10028       for (size_t batch_size = 1; batch_size <= 560; batch_size += 111) {
10029         VUnaryMicrokernelTester()
10030           .batch_size(batch_size)
10031           .prescale(prescale)
10032           .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x112, xnn_init_f32_elu_avx512_rr1_p6_params);
10033       }
10034     }
10035   }
10036 
TEST(F32_VELU__AVX512F_RR1_P6_X112,alpha)10037   TEST(F32_VELU__AVX512F_RR1_P6_X112, alpha) {
10038     TEST_REQUIRES_X86_AVX512F;
10039     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
10040       for (size_t batch_size = 1; batch_size <= 560; batch_size += 111) {
10041         VUnaryMicrokernelTester()
10042           .batch_size(batch_size)
10043           .alpha(alpha)
10044           .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x112, xnn_init_f32_elu_avx512_rr1_p6_params);
10045       }
10046     }
10047   }
10048 
TEST(F32_VELU__AVX512F_RR1_P6_X112,beta)10049   TEST(F32_VELU__AVX512F_RR1_P6_X112, beta) {
10050     TEST_REQUIRES_X86_AVX512F;
10051     for (float beta : std::vector<float>({0.3f, 3.0f})) {
10052       for (size_t batch_size = 1; batch_size <= 560; batch_size += 111) {
10053         VUnaryMicrokernelTester()
10054           .batch_size(batch_size)
10055           .beta(beta)
10056           .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x112, xnn_init_f32_elu_avx512_rr1_p6_params);
10057       }
10058     }
10059   }
10060 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
10061 
10062 
10063 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX512F_RR1_P6_X128,batch_eq_128)10064   TEST(F32_VELU__AVX512F_RR1_P6_X128, batch_eq_128) {
10065     TEST_REQUIRES_X86_AVX512F;
10066     VUnaryMicrokernelTester()
10067       .batch_size(128)
10068       .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x128, xnn_init_f32_elu_avx512_rr1_p6_params);
10069   }
10070 
TEST(F32_VELU__AVX512F_RR1_P6_X128,batch_div_128)10071   TEST(F32_VELU__AVX512F_RR1_P6_X128, batch_div_128) {
10072     TEST_REQUIRES_X86_AVX512F;
10073     for (size_t batch_size = 256; batch_size < 1280; batch_size += 128) {
10074       VUnaryMicrokernelTester()
10075         .batch_size(batch_size)
10076         .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x128, xnn_init_f32_elu_avx512_rr1_p6_params);
10077     }
10078   }
10079 
TEST(F32_VELU__AVX512F_RR1_P6_X128,batch_lt_128)10080   TEST(F32_VELU__AVX512F_RR1_P6_X128, batch_lt_128) {
10081     TEST_REQUIRES_X86_AVX512F;
10082     for (size_t batch_size = 1; batch_size < 128; batch_size++) {
10083       VUnaryMicrokernelTester()
10084         .batch_size(batch_size)
10085         .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x128, xnn_init_f32_elu_avx512_rr1_p6_params);
10086     }
10087   }
10088 
TEST(F32_VELU__AVX512F_RR1_P6_X128,batch_gt_128)10089   TEST(F32_VELU__AVX512F_RR1_P6_X128, batch_gt_128) {
10090     TEST_REQUIRES_X86_AVX512F;
10091     for (size_t batch_size = 129; batch_size < 256; batch_size++) {
10092       VUnaryMicrokernelTester()
10093         .batch_size(batch_size)
10094         .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x128, xnn_init_f32_elu_avx512_rr1_p6_params);
10095     }
10096   }
10097 
TEST(F32_VELU__AVX512F_RR1_P6_X128,inplace)10098   TEST(F32_VELU__AVX512F_RR1_P6_X128, inplace) {
10099     TEST_REQUIRES_X86_AVX512F;
10100     for (size_t batch_size = 1; batch_size <= 640; batch_size += 127) {
10101       VUnaryMicrokernelTester()
10102         .batch_size(batch_size)
10103         .inplace(true)
10104         .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x128, xnn_init_f32_elu_avx512_rr1_p6_params);
10105     }
10106   }
10107 
TEST(F32_VELU__AVX512F_RR1_P6_X128,prescale)10108   TEST(F32_VELU__AVX512F_RR1_P6_X128, prescale) {
10109     TEST_REQUIRES_X86_AVX512F;
10110     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
10111       for (size_t batch_size = 1; batch_size <= 640; batch_size += 127) {
10112         VUnaryMicrokernelTester()
10113           .batch_size(batch_size)
10114           .prescale(prescale)
10115           .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x128, xnn_init_f32_elu_avx512_rr1_p6_params);
10116       }
10117     }
10118   }
10119 
TEST(F32_VELU__AVX512F_RR1_P6_X128,alpha)10120   TEST(F32_VELU__AVX512F_RR1_P6_X128, alpha) {
10121     TEST_REQUIRES_X86_AVX512F;
10122     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
10123       for (size_t batch_size = 1; batch_size <= 640; batch_size += 127) {
10124         VUnaryMicrokernelTester()
10125           .batch_size(batch_size)
10126           .alpha(alpha)
10127           .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x128, xnn_init_f32_elu_avx512_rr1_p6_params);
10128       }
10129     }
10130   }
10131 
TEST(F32_VELU__AVX512F_RR1_P6_X128,beta)10132   TEST(F32_VELU__AVX512F_RR1_P6_X128, beta) {
10133     TEST_REQUIRES_X86_AVX512F;
10134     for (float beta : std::vector<float>({0.3f, 3.0f})) {
10135       for (size_t batch_size = 1; batch_size <= 640; batch_size += 127) {
10136         VUnaryMicrokernelTester()
10137           .batch_size(batch_size)
10138           .beta(beta)
10139           .Test(xnn_f32_velu_ukernel__avx512f_rr1_p6_x128, xnn_init_f32_elu_avx512_rr1_p6_params);
10140       }
10141     }
10142   }
10143 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
10144 
10145 
10146 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X4,batch_eq_4)10147   TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X4, batch_eq_4) {
10148     VUnaryMicrokernelTester()
10149       .batch_size(4)
10150       .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x4, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10151   }
10152 
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X4,batch_div_4)10153   TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X4, batch_div_4) {
10154     for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
10155       VUnaryMicrokernelTester()
10156         .batch_size(batch_size)
10157         .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x4, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10158     }
10159   }
10160 
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X4,batch_lt_4)10161   TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X4, batch_lt_4) {
10162     for (size_t batch_size = 1; batch_size < 4; batch_size++) {
10163       VUnaryMicrokernelTester()
10164         .batch_size(batch_size)
10165         .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x4, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10166     }
10167   }
10168 
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X4,batch_gt_4)10169   TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X4, batch_gt_4) {
10170     for (size_t batch_size = 5; batch_size < 8; batch_size++) {
10171       VUnaryMicrokernelTester()
10172         .batch_size(batch_size)
10173         .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x4, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10174     }
10175   }
10176 
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X4,inplace)10177   TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X4, inplace) {
10178     for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
10179       VUnaryMicrokernelTester()
10180         .batch_size(batch_size)
10181         .inplace(true)
10182         .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x4, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10183     }
10184   }
10185 
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X4,prescale)10186   TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X4, prescale) {
10187     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
10188       for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
10189         VUnaryMicrokernelTester()
10190           .batch_size(batch_size)
10191           .prescale(prescale)
10192           .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x4, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10193       }
10194     }
10195   }
10196 
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X4,alpha)10197   TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X4, alpha) {
10198     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
10199       for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
10200         VUnaryMicrokernelTester()
10201           .batch_size(batch_size)
10202           .alpha(alpha)
10203           .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x4, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10204       }
10205     }
10206   }
10207 
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X4,beta)10208   TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X4, beta) {
10209     for (float beta : std::vector<float>({0.3f, 3.0f})) {
10210       for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
10211         VUnaryMicrokernelTester()
10212           .batch_size(batch_size)
10213           .beta(beta)
10214           .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x4, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10215       }
10216     }
10217   }
10218 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
10219 
10220 
10221 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X8,batch_eq_8)10222   TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X8, batch_eq_8) {
10223     VUnaryMicrokernelTester()
10224       .batch_size(8)
10225       .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x8, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10226   }
10227 
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X8,batch_div_8)10228   TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X8, batch_div_8) {
10229     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
10230       VUnaryMicrokernelTester()
10231         .batch_size(batch_size)
10232         .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x8, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10233     }
10234   }
10235 
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X8,batch_lt_8)10236   TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X8, batch_lt_8) {
10237     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
10238       VUnaryMicrokernelTester()
10239         .batch_size(batch_size)
10240         .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x8, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10241     }
10242   }
10243 
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X8,batch_gt_8)10244   TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X8, batch_gt_8) {
10245     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
10246       VUnaryMicrokernelTester()
10247         .batch_size(batch_size)
10248         .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x8, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10249     }
10250   }
10251 
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X8,inplace)10252   TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X8, inplace) {
10253     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
10254       VUnaryMicrokernelTester()
10255         .batch_size(batch_size)
10256         .inplace(true)
10257         .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x8, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10258     }
10259   }
10260 
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X8,prescale)10261   TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X8, prescale) {
10262     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
10263       for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
10264         VUnaryMicrokernelTester()
10265           .batch_size(batch_size)
10266           .prescale(prescale)
10267           .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x8, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10268       }
10269     }
10270   }
10271 
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X8,alpha)10272   TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X8, alpha) {
10273     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
10274       for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
10275         VUnaryMicrokernelTester()
10276           .batch_size(batch_size)
10277           .alpha(alpha)
10278           .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x8, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10279       }
10280     }
10281   }
10282 
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X8,beta)10283   TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X8, beta) {
10284     for (float beta : std::vector<float>({0.3f, 3.0f})) {
10285       for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
10286         VUnaryMicrokernelTester()
10287           .batch_size(batch_size)
10288           .beta(beta)
10289           .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x8, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10290       }
10291     }
10292   }
10293 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
10294 
10295 
10296 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X12,batch_eq_12)10297   TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X12, batch_eq_12) {
10298     VUnaryMicrokernelTester()
10299       .batch_size(12)
10300       .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x12, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10301   }
10302 
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X12,batch_div_12)10303   TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X12, batch_div_12) {
10304     for (size_t batch_size = 24; batch_size < 120; batch_size += 12) {
10305       VUnaryMicrokernelTester()
10306         .batch_size(batch_size)
10307         .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x12, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10308     }
10309   }
10310 
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X12,batch_lt_12)10311   TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X12, batch_lt_12) {
10312     for (size_t batch_size = 1; batch_size < 12; batch_size++) {
10313       VUnaryMicrokernelTester()
10314         .batch_size(batch_size)
10315         .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x12, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10316     }
10317   }
10318 
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X12,batch_gt_12)10319   TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X12, batch_gt_12) {
10320     for (size_t batch_size = 13; batch_size < 24; batch_size++) {
10321       VUnaryMicrokernelTester()
10322         .batch_size(batch_size)
10323         .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x12, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10324     }
10325   }
10326 
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X12,inplace)10327   TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X12, inplace) {
10328     for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
10329       VUnaryMicrokernelTester()
10330         .batch_size(batch_size)
10331         .inplace(true)
10332         .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x12, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10333     }
10334   }
10335 
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X12,prescale)10336   TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X12, prescale) {
10337     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
10338       for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
10339         VUnaryMicrokernelTester()
10340           .batch_size(batch_size)
10341           .prescale(prescale)
10342           .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x12, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10343       }
10344     }
10345   }
10346 
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X12,alpha)10347   TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X12, alpha) {
10348     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
10349       for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
10350         VUnaryMicrokernelTester()
10351           .batch_size(batch_size)
10352           .alpha(alpha)
10353           .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x12, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10354       }
10355     }
10356   }
10357 
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X12,beta)10358   TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X12, beta) {
10359     for (float beta : std::vector<float>({0.3f, 3.0f})) {
10360       for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
10361         VUnaryMicrokernelTester()
10362           .batch_size(batch_size)
10363           .beta(beta)
10364           .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x12, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10365       }
10366     }
10367   }
10368 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
10369 
10370 
10371 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X16,batch_eq_16)10372   TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X16, batch_eq_16) {
10373     VUnaryMicrokernelTester()
10374       .batch_size(16)
10375       .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x16, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10376   }
10377 
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X16,batch_div_16)10378   TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X16, batch_div_16) {
10379     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
10380       VUnaryMicrokernelTester()
10381         .batch_size(batch_size)
10382         .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x16, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10383     }
10384   }
10385 
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X16,batch_lt_16)10386   TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X16, batch_lt_16) {
10387     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
10388       VUnaryMicrokernelTester()
10389         .batch_size(batch_size)
10390         .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x16, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10391     }
10392   }
10393 
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X16,batch_gt_16)10394   TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X16, batch_gt_16) {
10395     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
10396       VUnaryMicrokernelTester()
10397         .batch_size(batch_size)
10398         .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x16, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10399     }
10400   }
10401 
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X16,inplace)10402   TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X16, inplace) {
10403     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
10404       VUnaryMicrokernelTester()
10405         .batch_size(batch_size)
10406         .inplace(true)
10407         .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x16, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10408     }
10409   }
10410 
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X16,prescale)10411   TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X16, prescale) {
10412     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
10413       for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
10414         VUnaryMicrokernelTester()
10415           .batch_size(batch_size)
10416           .prescale(prescale)
10417           .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x16, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10418       }
10419     }
10420   }
10421 
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X16,alpha)10422   TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X16, alpha) {
10423     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
10424       for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
10425         VUnaryMicrokernelTester()
10426           .batch_size(batch_size)
10427           .alpha(alpha)
10428           .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x16, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10429       }
10430     }
10431   }
10432 
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X16,beta)10433   TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X16, beta) {
10434     for (float beta : std::vector<float>({0.3f, 3.0f})) {
10435       for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
10436         VUnaryMicrokernelTester()
10437           .batch_size(batch_size)
10438           .beta(beta)
10439           .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x16, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10440       }
10441     }
10442   }
10443 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
10444 
10445 
10446 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X20,batch_eq_20)10447   TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X20, batch_eq_20) {
10448     VUnaryMicrokernelTester()
10449       .batch_size(20)
10450       .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x20, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10451   }
10452 
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X20,batch_div_20)10453   TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X20, batch_div_20) {
10454     for (size_t batch_size = 40; batch_size < 200; batch_size += 20) {
10455       VUnaryMicrokernelTester()
10456         .batch_size(batch_size)
10457         .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x20, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10458     }
10459   }
10460 
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X20,batch_lt_20)10461   TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X20, batch_lt_20) {
10462     for (size_t batch_size = 1; batch_size < 20; batch_size++) {
10463       VUnaryMicrokernelTester()
10464         .batch_size(batch_size)
10465         .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x20, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10466     }
10467   }
10468 
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X20,batch_gt_20)10469   TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X20, batch_gt_20) {
10470     for (size_t batch_size = 21; batch_size < 40; batch_size++) {
10471       VUnaryMicrokernelTester()
10472         .batch_size(batch_size)
10473         .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x20, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10474     }
10475   }
10476 
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X20,inplace)10477   TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X20, inplace) {
10478     for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
10479       VUnaryMicrokernelTester()
10480         .batch_size(batch_size)
10481         .inplace(true)
10482         .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x20, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10483     }
10484   }
10485 
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X20,prescale)10486   TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X20, prescale) {
10487     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
10488       for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
10489         VUnaryMicrokernelTester()
10490           .batch_size(batch_size)
10491           .prescale(prescale)
10492           .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x20, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10493       }
10494     }
10495   }
10496 
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X20,alpha)10497   TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X20, alpha) {
10498     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
10499       for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
10500         VUnaryMicrokernelTester()
10501           .batch_size(batch_size)
10502           .alpha(alpha)
10503           .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x20, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10504       }
10505     }
10506   }
10507 
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X20,beta)10508   TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X20, beta) {
10509     for (float beta : std::vector<float>({0.3f, 3.0f})) {
10510       for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
10511         VUnaryMicrokernelTester()
10512           .batch_size(batch_size)
10513           .beta(beta)
10514           .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x20, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10515       }
10516     }
10517   }
10518 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
10519 
10520 
10521 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X24,batch_eq_24)10522   TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X24, batch_eq_24) {
10523     VUnaryMicrokernelTester()
10524       .batch_size(24)
10525       .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x24, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10526   }
10527 
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X24,batch_div_24)10528   TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X24, batch_div_24) {
10529     for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
10530       VUnaryMicrokernelTester()
10531         .batch_size(batch_size)
10532         .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x24, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10533     }
10534   }
10535 
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X24,batch_lt_24)10536   TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X24, batch_lt_24) {
10537     for (size_t batch_size = 1; batch_size < 24; batch_size++) {
10538       VUnaryMicrokernelTester()
10539         .batch_size(batch_size)
10540         .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x24, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10541     }
10542   }
10543 
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X24,batch_gt_24)10544   TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X24, batch_gt_24) {
10545     for (size_t batch_size = 25; batch_size < 48; batch_size++) {
10546       VUnaryMicrokernelTester()
10547         .batch_size(batch_size)
10548         .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x24, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10549     }
10550   }
10551 
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X24,inplace)10552   TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X24, inplace) {
10553     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
10554       VUnaryMicrokernelTester()
10555         .batch_size(batch_size)
10556         .inplace(true)
10557         .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x24, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10558     }
10559   }
10560 
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X24,prescale)10561   TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X24, prescale) {
10562     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
10563       for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
10564         VUnaryMicrokernelTester()
10565           .batch_size(batch_size)
10566           .prescale(prescale)
10567           .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x24, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10568       }
10569     }
10570   }
10571 
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X24,alpha)10572   TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X24, alpha) {
10573     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
10574       for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
10575         VUnaryMicrokernelTester()
10576           .batch_size(batch_size)
10577           .alpha(alpha)
10578           .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x24, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10579       }
10580     }
10581   }
10582 
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X24,beta)10583   TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X24, beta) {
10584     for (float beta : std::vector<float>({0.3f, 3.0f})) {
10585       for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
10586         VUnaryMicrokernelTester()
10587           .batch_size(batch_size)
10588           .beta(beta)
10589           .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x24, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
10590       }
10591     }
10592   }
10593 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
10594 
10595 
10596 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X4,batch_eq_4)10597   TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X4, batch_eq_4) {
10598     VUnaryMicrokernelTester()
10599       .batch_size(4)
10600       .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x4, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10601   }
10602 
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X4,batch_div_4)10603   TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X4, batch_div_4) {
10604     for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
10605       VUnaryMicrokernelTester()
10606         .batch_size(batch_size)
10607         .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x4, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10608     }
10609   }
10610 
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X4,batch_lt_4)10611   TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X4, batch_lt_4) {
10612     for (size_t batch_size = 1; batch_size < 4; batch_size++) {
10613       VUnaryMicrokernelTester()
10614         .batch_size(batch_size)
10615         .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x4, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10616     }
10617   }
10618 
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X4,batch_gt_4)10619   TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X4, batch_gt_4) {
10620     for (size_t batch_size = 5; batch_size < 8; batch_size++) {
10621       VUnaryMicrokernelTester()
10622         .batch_size(batch_size)
10623         .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x4, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10624     }
10625   }
10626 
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X4,inplace)10627   TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X4, inplace) {
10628     for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
10629       VUnaryMicrokernelTester()
10630         .batch_size(batch_size)
10631         .inplace(true)
10632         .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x4, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10633     }
10634   }
10635 
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X4,prescale)10636   TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X4, prescale) {
10637     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
10638       for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
10639         VUnaryMicrokernelTester()
10640           .batch_size(batch_size)
10641           .prescale(prescale)
10642           .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x4, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10643       }
10644     }
10645   }
10646 
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X4,alpha)10647   TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X4, alpha) {
10648     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
10649       for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
10650         VUnaryMicrokernelTester()
10651           .batch_size(batch_size)
10652           .alpha(alpha)
10653           .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x4, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10654       }
10655     }
10656   }
10657 
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X4,beta)10658   TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X4, beta) {
10659     for (float beta : std::vector<float>({0.3f, 3.0f})) {
10660       for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
10661         VUnaryMicrokernelTester()
10662           .batch_size(batch_size)
10663           .beta(beta)
10664           .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x4, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10665       }
10666     }
10667   }
10668 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
10669 
10670 
10671 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X8,batch_eq_8)10672   TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X8, batch_eq_8) {
10673     VUnaryMicrokernelTester()
10674       .batch_size(8)
10675       .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x8, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10676   }
10677 
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X8,batch_div_8)10678   TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X8, batch_div_8) {
10679     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
10680       VUnaryMicrokernelTester()
10681         .batch_size(batch_size)
10682         .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x8, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10683     }
10684   }
10685 
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X8,batch_lt_8)10686   TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X8, batch_lt_8) {
10687     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
10688       VUnaryMicrokernelTester()
10689         .batch_size(batch_size)
10690         .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x8, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10691     }
10692   }
10693 
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X8,batch_gt_8)10694   TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X8, batch_gt_8) {
10695     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
10696       VUnaryMicrokernelTester()
10697         .batch_size(batch_size)
10698         .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x8, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10699     }
10700   }
10701 
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X8,inplace)10702   TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X8, inplace) {
10703     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
10704       VUnaryMicrokernelTester()
10705         .batch_size(batch_size)
10706         .inplace(true)
10707         .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x8, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10708     }
10709   }
10710 
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X8,prescale)10711   TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X8, prescale) {
10712     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
10713       for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
10714         VUnaryMicrokernelTester()
10715           .batch_size(batch_size)
10716           .prescale(prescale)
10717           .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x8, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10718       }
10719     }
10720   }
10721 
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X8,alpha)10722   TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X8, alpha) {
10723     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
10724       for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
10725         VUnaryMicrokernelTester()
10726           .batch_size(batch_size)
10727           .alpha(alpha)
10728           .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x8, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10729       }
10730     }
10731   }
10732 
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X8,beta)10733   TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X8, beta) {
10734     for (float beta : std::vector<float>({0.3f, 3.0f})) {
10735       for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
10736         VUnaryMicrokernelTester()
10737           .batch_size(batch_size)
10738           .beta(beta)
10739           .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x8, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10740       }
10741     }
10742   }
10743 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
10744 
10745 
10746 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X12,batch_eq_12)10747   TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X12, batch_eq_12) {
10748     VUnaryMicrokernelTester()
10749       .batch_size(12)
10750       .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x12, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10751   }
10752 
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X12,batch_div_12)10753   TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X12, batch_div_12) {
10754     for (size_t batch_size = 24; batch_size < 120; batch_size += 12) {
10755       VUnaryMicrokernelTester()
10756         .batch_size(batch_size)
10757         .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x12, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10758     }
10759   }
10760 
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X12,batch_lt_12)10761   TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X12, batch_lt_12) {
10762     for (size_t batch_size = 1; batch_size < 12; batch_size++) {
10763       VUnaryMicrokernelTester()
10764         .batch_size(batch_size)
10765         .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x12, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10766     }
10767   }
10768 
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X12,batch_gt_12)10769   TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X12, batch_gt_12) {
10770     for (size_t batch_size = 13; batch_size < 24; batch_size++) {
10771       VUnaryMicrokernelTester()
10772         .batch_size(batch_size)
10773         .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x12, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10774     }
10775   }
10776 
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X12,inplace)10777   TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X12, inplace) {
10778     for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
10779       VUnaryMicrokernelTester()
10780         .batch_size(batch_size)
10781         .inplace(true)
10782         .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x12, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10783     }
10784   }
10785 
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X12,prescale)10786   TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X12, prescale) {
10787     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
10788       for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
10789         VUnaryMicrokernelTester()
10790           .batch_size(batch_size)
10791           .prescale(prescale)
10792           .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x12, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10793       }
10794     }
10795   }
10796 
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X12,alpha)10797   TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X12, alpha) {
10798     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
10799       for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
10800         VUnaryMicrokernelTester()
10801           .batch_size(batch_size)
10802           .alpha(alpha)
10803           .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x12, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10804       }
10805     }
10806   }
10807 
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X12,beta)10808   TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X12, beta) {
10809     for (float beta : std::vector<float>({0.3f, 3.0f})) {
10810       for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
10811         VUnaryMicrokernelTester()
10812           .batch_size(batch_size)
10813           .beta(beta)
10814           .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x12, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10815       }
10816     }
10817   }
10818 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
10819 
10820 
10821 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X16,batch_eq_16)10822   TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X16, batch_eq_16) {
10823     VUnaryMicrokernelTester()
10824       .batch_size(16)
10825       .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10826   }
10827 
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X16,batch_div_16)10828   TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X16, batch_div_16) {
10829     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
10830       VUnaryMicrokernelTester()
10831         .batch_size(batch_size)
10832         .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10833     }
10834   }
10835 
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X16,batch_lt_16)10836   TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X16, batch_lt_16) {
10837     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
10838       VUnaryMicrokernelTester()
10839         .batch_size(batch_size)
10840         .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10841     }
10842   }
10843 
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X16,batch_gt_16)10844   TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X16, batch_gt_16) {
10845     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
10846       VUnaryMicrokernelTester()
10847         .batch_size(batch_size)
10848         .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10849     }
10850   }
10851 
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X16,inplace)10852   TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X16, inplace) {
10853     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
10854       VUnaryMicrokernelTester()
10855         .batch_size(batch_size)
10856         .inplace(true)
10857         .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10858     }
10859   }
10860 
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X16,prescale)10861   TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X16, prescale) {
10862     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
10863       for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
10864         VUnaryMicrokernelTester()
10865           .batch_size(batch_size)
10866           .prescale(prescale)
10867           .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10868       }
10869     }
10870   }
10871 
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X16,alpha)10872   TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X16, alpha) {
10873     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
10874       for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
10875         VUnaryMicrokernelTester()
10876           .batch_size(batch_size)
10877           .alpha(alpha)
10878           .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10879       }
10880     }
10881   }
10882 
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X16,beta)10883   TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X16, beta) {
10884     for (float beta : std::vector<float>({0.3f, 3.0f})) {
10885       for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
10886         VUnaryMicrokernelTester()
10887           .batch_size(batch_size)
10888           .beta(beta)
10889           .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10890       }
10891     }
10892   }
10893 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
10894 
10895 
10896 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X20,batch_eq_20)10897   TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X20, batch_eq_20) {
10898     VUnaryMicrokernelTester()
10899       .batch_size(20)
10900       .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10901   }
10902 
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X20,batch_div_20)10903   TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X20, batch_div_20) {
10904     for (size_t batch_size = 40; batch_size < 200; batch_size += 20) {
10905       VUnaryMicrokernelTester()
10906         .batch_size(batch_size)
10907         .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10908     }
10909   }
10910 
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X20,batch_lt_20)10911   TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X20, batch_lt_20) {
10912     for (size_t batch_size = 1; batch_size < 20; batch_size++) {
10913       VUnaryMicrokernelTester()
10914         .batch_size(batch_size)
10915         .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10916     }
10917   }
10918 
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X20,batch_gt_20)10919   TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X20, batch_gt_20) {
10920     for (size_t batch_size = 21; batch_size < 40; batch_size++) {
10921       VUnaryMicrokernelTester()
10922         .batch_size(batch_size)
10923         .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10924     }
10925   }
10926 
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X20,inplace)10927   TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X20, inplace) {
10928     for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
10929       VUnaryMicrokernelTester()
10930         .batch_size(batch_size)
10931         .inplace(true)
10932         .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10933     }
10934   }
10935 
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X20,prescale)10936   TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X20, prescale) {
10937     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
10938       for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
10939         VUnaryMicrokernelTester()
10940           .batch_size(batch_size)
10941           .prescale(prescale)
10942           .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10943       }
10944     }
10945   }
10946 
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X20,alpha)10947   TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X20, alpha) {
10948     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
10949       for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
10950         VUnaryMicrokernelTester()
10951           .batch_size(batch_size)
10952           .alpha(alpha)
10953           .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10954       }
10955     }
10956   }
10957 
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X20,beta)10958   TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X20, beta) {
10959     for (float beta : std::vector<float>({0.3f, 3.0f})) {
10960       for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
10961         VUnaryMicrokernelTester()
10962           .batch_size(batch_size)
10963           .beta(beta)
10964           .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10965       }
10966     }
10967   }
10968 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
10969 
10970 
10971 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X24,batch_eq_24)10972   TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X24, batch_eq_24) {
10973     VUnaryMicrokernelTester()
10974       .batch_size(24)
10975       .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10976   }
10977 
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X24,batch_div_24)10978   TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X24, batch_div_24) {
10979     for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
10980       VUnaryMicrokernelTester()
10981         .batch_size(batch_size)
10982         .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10983     }
10984   }
10985 
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X24,batch_lt_24)10986   TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X24, batch_lt_24) {
10987     for (size_t batch_size = 1; batch_size < 24; batch_size++) {
10988       VUnaryMicrokernelTester()
10989         .batch_size(batch_size)
10990         .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10991     }
10992   }
10993 
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X24,batch_gt_24)10994   TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X24, batch_gt_24) {
10995     for (size_t batch_size = 25; batch_size < 48; batch_size++) {
10996       VUnaryMicrokernelTester()
10997         .batch_size(batch_size)
10998         .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
10999     }
11000   }
11001 
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X24,inplace)11002   TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X24, inplace) {
11003     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
11004       VUnaryMicrokernelTester()
11005         .batch_size(batch_size)
11006         .inplace(true)
11007         .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11008     }
11009   }
11010 
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X24,prescale)11011   TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X24, prescale) {
11012     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
11013       for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
11014         VUnaryMicrokernelTester()
11015           .batch_size(batch_size)
11016           .prescale(prescale)
11017           .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11018       }
11019     }
11020   }
11021 
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X24,alpha)11022   TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X24, alpha) {
11023     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
11024       for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
11025         VUnaryMicrokernelTester()
11026           .batch_size(batch_size)
11027           .alpha(alpha)
11028           .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11029       }
11030     }
11031   }
11032 
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X24,beta)11033   TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X24, beta) {
11034     for (float beta : std::vector<float>({0.3f, 3.0f})) {
11035       for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
11036         VUnaryMicrokernelTester()
11037           .batch_size(batch_size)
11038           .beta(beta)
11039           .Test(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11040       }
11041     }
11042   }
11043 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
11044 
11045 
11046 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X4,batch_eq_4)11047   TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X4, batch_eq_4) {
11048     VUnaryMicrokernelTester()
11049       .batch_size(4)
11050       .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x4, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11051   }
11052 
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X4,batch_div_4)11053   TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X4, batch_div_4) {
11054     for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
11055       VUnaryMicrokernelTester()
11056         .batch_size(batch_size)
11057         .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x4, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11058     }
11059   }
11060 
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X4,batch_lt_4)11061   TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X4, batch_lt_4) {
11062     for (size_t batch_size = 1; batch_size < 4; batch_size++) {
11063       VUnaryMicrokernelTester()
11064         .batch_size(batch_size)
11065         .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x4, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11066     }
11067   }
11068 
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X4,batch_gt_4)11069   TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X4, batch_gt_4) {
11070     for (size_t batch_size = 5; batch_size < 8; batch_size++) {
11071       VUnaryMicrokernelTester()
11072         .batch_size(batch_size)
11073         .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x4, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11074     }
11075   }
11076 
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X4,inplace)11077   TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X4, inplace) {
11078     for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
11079       VUnaryMicrokernelTester()
11080         .batch_size(batch_size)
11081         .inplace(true)
11082         .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x4, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11083     }
11084   }
11085 
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X4,prescale)11086   TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X4, prescale) {
11087     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
11088       for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
11089         VUnaryMicrokernelTester()
11090           .batch_size(batch_size)
11091           .prescale(prescale)
11092           .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x4, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11093       }
11094     }
11095   }
11096 
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X4,alpha)11097   TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X4, alpha) {
11098     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
11099       for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
11100         VUnaryMicrokernelTester()
11101           .batch_size(batch_size)
11102           .alpha(alpha)
11103           .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x4, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11104       }
11105     }
11106   }
11107 
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X4,beta)11108   TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X4, beta) {
11109     for (float beta : std::vector<float>({0.3f, 3.0f})) {
11110       for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
11111         VUnaryMicrokernelTester()
11112           .batch_size(batch_size)
11113           .beta(beta)
11114           .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x4, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11115       }
11116     }
11117   }
11118 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
11119 
11120 
11121 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X8,batch_eq_8)11122   TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X8, batch_eq_8) {
11123     VUnaryMicrokernelTester()
11124       .batch_size(8)
11125       .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x8, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11126   }
11127 
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X8,batch_div_8)11128   TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X8, batch_div_8) {
11129     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
11130       VUnaryMicrokernelTester()
11131         .batch_size(batch_size)
11132         .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x8, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11133     }
11134   }
11135 
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X8,batch_lt_8)11136   TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X8, batch_lt_8) {
11137     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
11138       VUnaryMicrokernelTester()
11139         .batch_size(batch_size)
11140         .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x8, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11141     }
11142   }
11143 
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X8,batch_gt_8)11144   TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X8, batch_gt_8) {
11145     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
11146       VUnaryMicrokernelTester()
11147         .batch_size(batch_size)
11148         .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x8, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11149     }
11150   }
11151 
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X8,inplace)11152   TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X8, inplace) {
11153     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
11154       VUnaryMicrokernelTester()
11155         .batch_size(batch_size)
11156         .inplace(true)
11157         .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x8, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11158     }
11159   }
11160 
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X8,prescale)11161   TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X8, prescale) {
11162     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
11163       for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
11164         VUnaryMicrokernelTester()
11165           .batch_size(batch_size)
11166           .prescale(prescale)
11167           .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x8, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11168       }
11169     }
11170   }
11171 
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X8,alpha)11172   TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X8, alpha) {
11173     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
11174       for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
11175         VUnaryMicrokernelTester()
11176           .batch_size(batch_size)
11177           .alpha(alpha)
11178           .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x8, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11179       }
11180     }
11181   }
11182 
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X8,beta)11183   TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X8, beta) {
11184     for (float beta : std::vector<float>({0.3f, 3.0f})) {
11185       for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
11186         VUnaryMicrokernelTester()
11187           .batch_size(batch_size)
11188           .beta(beta)
11189           .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x8, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11190       }
11191     }
11192   }
11193 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
11194 
11195 
11196 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X12,batch_eq_12)11197   TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X12, batch_eq_12) {
11198     VUnaryMicrokernelTester()
11199       .batch_size(12)
11200       .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x12, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11201   }
11202 
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X12,batch_div_12)11203   TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X12, batch_div_12) {
11204     for (size_t batch_size = 24; batch_size < 120; batch_size += 12) {
11205       VUnaryMicrokernelTester()
11206         .batch_size(batch_size)
11207         .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x12, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11208     }
11209   }
11210 
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X12,batch_lt_12)11211   TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X12, batch_lt_12) {
11212     for (size_t batch_size = 1; batch_size < 12; batch_size++) {
11213       VUnaryMicrokernelTester()
11214         .batch_size(batch_size)
11215         .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x12, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11216     }
11217   }
11218 
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X12,batch_gt_12)11219   TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X12, batch_gt_12) {
11220     for (size_t batch_size = 13; batch_size < 24; batch_size++) {
11221       VUnaryMicrokernelTester()
11222         .batch_size(batch_size)
11223         .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x12, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11224     }
11225   }
11226 
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X12,inplace)11227   TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X12, inplace) {
11228     for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
11229       VUnaryMicrokernelTester()
11230         .batch_size(batch_size)
11231         .inplace(true)
11232         .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x12, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11233     }
11234   }
11235 
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X12,prescale)11236   TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X12, prescale) {
11237     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
11238       for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
11239         VUnaryMicrokernelTester()
11240           .batch_size(batch_size)
11241           .prescale(prescale)
11242           .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x12, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11243       }
11244     }
11245   }
11246 
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X12,alpha)11247   TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X12, alpha) {
11248     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
11249       for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
11250         VUnaryMicrokernelTester()
11251           .batch_size(batch_size)
11252           .alpha(alpha)
11253           .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x12, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11254       }
11255     }
11256   }
11257 
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X12,beta)11258   TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X12, beta) {
11259     for (float beta : std::vector<float>({0.3f, 3.0f})) {
11260       for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
11261         VUnaryMicrokernelTester()
11262           .batch_size(batch_size)
11263           .beta(beta)
11264           .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x12, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11265       }
11266     }
11267   }
11268 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
11269 
11270 
11271 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X16,batch_eq_16)11272   TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X16, batch_eq_16) {
11273     VUnaryMicrokernelTester()
11274       .batch_size(16)
11275       .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x16, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11276   }
11277 
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X16,batch_div_16)11278   TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X16, batch_div_16) {
11279     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
11280       VUnaryMicrokernelTester()
11281         .batch_size(batch_size)
11282         .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x16, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11283     }
11284   }
11285 
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X16,batch_lt_16)11286   TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X16, batch_lt_16) {
11287     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
11288       VUnaryMicrokernelTester()
11289         .batch_size(batch_size)
11290         .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x16, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11291     }
11292   }
11293 
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X16,batch_gt_16)11294   TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X16, batch_gt_16) {
11295     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
11296       VUnaryMicrokernelTester()
11297         .batch_size(batch_size)
11298         .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x16, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11299     }
11300   }
11301 
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X16,inplace)11302   TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X16, inplace) {
11303     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
11304       VUnaryMicrokernelTester()
11305         .batch_size(batch_size)
11306         .inplace(true)
11307         .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x16, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11308     }
11309   }
11310 
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X16,prescale)11311   TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X16, prescale) {
11312     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
11313       for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
11314         VUnaryMicrokernelTester()
11315           .batch_size(batch_size)
11316           .prescale(prescale)
11317           .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x16, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11318       }
11319     }
11320   }
11321 
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X16,alpha)11322   TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X16, alpha) {
11323     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
11324       for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
11325         VUnaryMicrokernelTester()
11326           .batch_size(batch_size)
11327           .alpha(alpha)
11328           .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x16, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11329       }
11330     }
11331   }
11332 
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X16,beta)11333   TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X16, beta) {
11334     for (float beta : std::vector<float>({0.3f, 3.0f})) {
11335       for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
11336         VUnaryMicrokernelTester()
11337           .batch_size(batch_size)
11338           .beta(beta)
11339           .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x16, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11340       }
11341     }
11342   }
11343 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
11344 
11345 
11346 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X20,batch_eq_20)11347   TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X20, batch_eq_20) {
11348     VUnaryMicrokernelTester()
11349       .batch_size(20)
11350       .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x20, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11351   }
11352 
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X20,batch_div_20)11353   TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X20, batch_div_20) {
11354     for (size_t batch_size = 40; batch_size < 200; batch_size += 20) {
11355       VUnaryMicrokernelTester()
11356         .batch_size(batch_size)
11357         .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x20, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11358     }
11359   }
11360 
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X20,batch_lt_20)11361   TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X20, batch_lt_20) {
11362     for (size_t batch_size = 1; batch_size < 20; batch_size++) {
11363       VUnaryMicrokernelTester()
11364         .batch_size(batch_size)
11365         .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x20, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11366     }
11367   }
11368 
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X20,batch_gt_20)11369   TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X20, batch_gt_20) {
11370     for (size_t batch_size = 21; batch_size < 40; batch_size++) {
11371       VUnaryMicrokernelTester()
11372         .batch_size(batch_size)
11373         .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x20, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11374     }
11375   }
11376 
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X20,inplace)11377   TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X20, inplace) {
11378     for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
11379       VUnaryMicrokernelTester()
11380         .batch_size(batch_size)
11381         .inplace(true)
11382         .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x20, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11383     }
11384   }
11385 
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X20,prescale)11386   TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X20, prescale) {
11387     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
11388       for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
11389         VUnaryMicrokernelTester()
11390           .batch_size(batch_size)
11391           .prescale(prescale)
11392           .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x20, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11393       }
11394     }
11395   }
11396 
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X20,alpha)11397   TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X20, alpha) {
11398     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
11399       for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
11400         VUnaryMicrokernelTester()
11401           .batch_size(batch_size)
11402           .alpha(alpha)
11403           .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x20, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11404       }
11405     }
11406   }
11407 
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X20,beta)11408   TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X20, beta) {
11409     for (float beta : std::vector<float>({0.3f, 3.0f})) {
11410       for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
11411         VUnaryMicrokernelTester()
11412           .batch_size(batch_size)
11413           .beta(beta)
11414           .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x20, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11415       }
11416     }
11417   }
11418 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
11419 
11420 
11421 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X24,batch_eq_24)11422   TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X24, batch_eq_24) {
11423     VUnaryMicrokernelTester()
11424       .batch_size(24)
11425       .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11426   }
11427 
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X24,batch_div_24)11428   TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X24, batch_div_24) {
11429     for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
11430       VUnaryMicrokernelTester()
11431         .batch_size(batch_size)
11432         .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11433     }
11434   }
11435 
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X24,batch_lt_24)11436   TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X24, batch_lt_24) {
11437     for (size_t batch_size = 1; batch_size < 24; batch_size++) {
11438       VUnaryMicrokernelTester()
11439         .batch_size(batch_size)
11440         .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11441     }
11442   }
11443 
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X24,batch_gt_24)11444   TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X24, batch_gt_24) {
11445     for (size_t batch_size = 25; batch_size < 48; batch_size++) {
11446       VUnaryMicrokernelTester()
11447         .batch_size(batch_size)
11448         .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11449     }
11450   }
11451 
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X24,inplace)11452   TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X24, inplace) {
11453     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
11454       VUnaryMicrokernelTester()
11455         .batch_size(batch_size)
11456         .inplace(true)
11457         .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11458     }
11459   }
11460 
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X24,prescale)11461   TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X24, prescale) {
11462     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
11463       for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
11464         VUnaryMicrokernelTester()
11465           .batch_size(batch_size)
11466           .prescale(prescale)
11467           .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11468       }
11469     }
11470   }
11471 
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X24,alpha)11472   TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X24, alpha) {
11473     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
11474       for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
11475         VUnaryMicrokernelTester()
11476           .batch_size(batch_size)
11477           .alpha(alpha)
11478           .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11479       }
11480     }
11481   }
11482 
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X24,beta)11483   TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X24, beta) {
11484     for (float beta : std::vector<float>({0.3f, 3.0f})) {
11485       for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
11486         VUnaryMicrokernelTester()
11487           .batch_size(batch_size)
11488           .beta(beta)
11489           .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24, xnn_init_f32_elu_wasmsimd_rr2_lut16_p3_params);
11490       }
11491     }
11492   }
11493 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
11494 
11495 
11496 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X4,batch_eq_4)11497   TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X4, batch_eq_4) {
11498     VUnaryMicrokernelTester()
11499       .batch_size(4)
11500       .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x4, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11501   }
11502 
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X4,batch_div_4)11503   TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X4, batch_div_4) {
11504     for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
11505       VUnaryMicrokernelTester()
11506         .batch_size(batch_size)
11507         .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x4, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11508     }
11509   }
11510 
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X4,batch_lt_4)11511   TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X4, batch_lt_4) {
11512     for (size_t batch_size = 1; batch_size < 4; batch_size++) {
11513       VUnaryMicrokernelTester()
11514         .batch_size(batch_size)
11515         .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x4, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11516     }
11517   }
11518 
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X4,batch_gt_4)11519   TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X4, batch_gt_4) {
11520     for (size_t batch_size = 5; batch_size < 8; batch_size++) {
11521       VUnaryMicrokernelTester()
11522         .batch_size(batch_size)
11523         .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x4, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11524     }
11525   }
11526 
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X4,inplace)11527   TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X4, inplace) {
11528     for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
11529       VUnaryMicrokernelTester()
11530         .batch_size(batch_size)
11531         .inplace(true)
11532         .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x4, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11533     }
11534   }
11535 
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X4,prescale)11536   TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X4, prescale) {
11537     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
11538       for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
11539         VUnaryMicrokernelTester()
11540           .batch_size(batch_size)
11541           .prescale(prescale)
11542           .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x4, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11543       }
11544     }
11545   }
11546 
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X4,alpha)11547   TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X4, alpha) {
11548     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
11549       for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
11550         VUnaryMicrokernelTester()
11551           .batch_size(batch_size)
11552           .alpha(alpha)
11553           .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x4, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11554       }
11555     }
11556   }
11557 
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X4,beta)11558   TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X4, beta) {
11559     for (float beta : std::vector<float>({0.3f, 3.0f})) {
11560       for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
11561         VUnaryMicrokernelTester()
11562           .batch_size(batch_size)
11563           .beta(beta)
11564           .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x4, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11565       }
11566     }
11567   }
11568 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
11569 
11570 
11571 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X8,batch_eq_8)11572   TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X8, batch_eq_8) {
11573     VUnaryMicrokernelTester()
11574       .batch_size(8)
11575       .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11576   }
11577 
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X8,batch_div_8)11578   TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X8, batch_div_8) {
11579     for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
11580       VUnaryMicrokernelTester()
11581         .batch_size(batch_size)
11582         .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11583     }
11584   }
11585 
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X8,batch_lt_8)11586   TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X8, batch_lt_8) {
11587     for (size_t batch_size = 1; batch_size < 8; batch_size++) {
11588       VUnaryMicrokernelTester()
11589         .batch_size(batch_size)
11590         .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11591     }
11592   }
11593 
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X8,batch_gt_8)11594   TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X8, batch_gt_8) {
11595     for (size_t batch_size = 9; batch_size < 16; batch_size++) {
11596       VUnaryMicrokernelTester()
11597         .batch_size(batch_size)
11598         .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11599     }
11600   }
11601 
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X8,inplace)11602   TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X8, inplace) {
11603     for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
11604       VUnaryMicrokernelTester()
11605         .batch_size(batch_size)
11606         .inplace(true)
11607         .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11608     }
11609   }
11610 
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X8,prescale)11611   TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X8, prescale) {
11612     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
11613       for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
11614         VUnaryMicrokernelTester()
11615           .batch_size(batch_size)
11616           .prescale(prescale)
11617           .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11618       }
11619     }
11620   }
11621 
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X8,alpha)11622   TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X8, alpha) {
11623     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
11624       for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
11625         VUnaryMicrokernelTester()
11626           .batch_size(batch_size)
11627           .alpha(alpha)
11628           .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11629       }
11630     }
11631   }
11632 
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X8,beta)11633   TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X8, beta) {
11634     for (float beta : std::vector<float>({0.3f, 3.0f})) {
11635       for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
11636         VUnaryMicrokernelTester()
11637           .batch_size(batch_size)
11638           .beta(beta)
11639           .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11640       }
11641     }
11642   }
11643 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
11644 
11645 
11646 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X12,batch_eq_12)11647   TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X12, batch_eq_12) {
11648     VUnaryMicrokernelTester()
11649       .batch_size(12)
11650       .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11651   }
11652 
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X12,batch_div_12)11653   TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X12, batch_div_12) {
11654     for (size_t batch_size = 24; batch_size < 120; batch_size += 12) {
11655       VUnaryMicrokernelTester()
11656         .batch_size(batch_size)
11657         .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11658     }
11659   }
11660 
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X12,batch_lt_12)11661   TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X12, batch_lt_12) {
11662     for (size_t batch_size = 1; batch_size < 12; batch_size++) {
11663       VUnaryMicrokernelTester()
11664         .batch_size(batch_size)
11665         .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11666     }
11667   }
11668 
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X12,batch_gt_12)11669   TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X12, batch_gt_12) {
11670     for (size_t batch_size = 13; batch_size < 24; batch_size++) {
11671       VUnaryMicrokernelTester()
11672         .batch_size(batch_size)
11673         .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11674     }
11675   }
11676 
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X12,inplace)11677   TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X12, inplace) {
11678     for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
11679       VUnaryMicrokernelTester()
11680         .batch_size(batch_size)
11681         .inplace(true)
11682         .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11683     }
11684   }
11685 
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X12,prescale)11686   TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X12, prescale) {
11687     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
11688       for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
11689         VUnaryMicrokernelTester()
11690           .batch_size(batch_size)
11691           .prescale(prescale)
11692           .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11693       }
11694     }
11695   }
11696 
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X12,alpha)11697   TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X12, alpha) {
11698     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
11699       for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
11700         VUnaryMicrokernelTester()
11701           .batch_size(batch_size)
11702           .alpha(alpha)
11703           .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11704       }
11705     }
11706   }
11707 
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X12,beta)11708   TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X12, beta) {
11709     for (float beta : std::vector<float>({0.3f, 3.0f})) {
11710       for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
11711         VUnaryMicrokernelTester()
11712           .batch_size(batch_size)
11713           .beta(beta)
11714           .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11715       }
11716     }
11717   }
11718 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
11719 
11720 
11721 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X16,batch_eq_16)11722   TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X16, batch_eq_16) {
11723     VUnaryMicrokernelTester()
11724       .batch_size(16)
11725       .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11726   }
11727 
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X16,batch_div_16)11728   TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X16, batch_div_16) {
11729     for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
11730       VUnaryMicrokernelTester()
11731         .batch_size(batch_size)
11732         .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11733     }
11734   }
11735 
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X16,batch_lt_16)11736   TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X16, batch_lt_16) {
11737     for (size_t batch_size = 1; batch_size < 16; batch_size++) {
11738       VUnaryMicrokernelTester()
11739         .batch_size(batch_size)
11740         .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11741     }
11742   }
11743 
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X16,batch_gt_16)11744   TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X16, batch_gt_16) {
11745     for (size_t batch_size = 17; batch_size < 32; batch_size++) {
11746       VUnaryMicrokernelTester()
11747         .batch_size(batch_size)
11748         .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11749     }
11750   }
11751 
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X16,inplace)11752   TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X16, inplace) {
11753     for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
11754       VUnaryMicrokernelTester()
11755         .batch_size(batch_size)
11756         .inplace(true)
11757         .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11758     }
11759   }
11760 
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X16,prescale)11761   TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X16, prescale) {
11762     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
11763       for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
11764         VUnaryMicrokernelTester()
11765           .batch_size(batch_size)
11766           .prescale(prescale)
11767           .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11768       }
11769     }
11770   }
11771 
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X16,alpha)11772   TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X16, alpha) {
11773     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
11774       for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
11775         VUnaryMicrokernelTester()
11776           .batch_size(batch_size)
11777           .alpha(alpha)
11778           .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11779       }
11780     }
11781   }
11782 
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X16,beta)11783   TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X16, beta) {
11784     for (float beta : std::vector<float>({0.3f, 3.0f})) {
11785       for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
11786         VUnaryMicrokernelTester()
11787           .batch_size(batch_size)
11788           .beta(beta)
11789           .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11790       }
11791     }
11792   }
11793 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
11794 
11795 
11796 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X20,batch_eq_20)11797   TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X20, batch_eq_20) {
11798     VUnaryMicrokernelTester()
11799       .batch_size(20)
11800       .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11801   }
11802 
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X20,batch_div_20)11803   TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X20, batch_div_20) {
11804     for (size_t batch_size = 40; batch_size < 200; batch_size += 20) {
11805       VUnaryMicrokernelTester()
11806         .batch_size(batch_size)
11807         .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11808     }
11809   }
11810 
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X20,batch_lt_20)11811   TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X20, batch_lt_20) {
11812     for (size_t batch_size = 1; batch_size < 20; batch_size++) {
11813       VUnaryMicrokernelTester()
11814         .batch_size(batch_size)
11815         .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11816     }
11817   }
11818 
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X20,batch_gt_20)11819   TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X20, batch_gt_20) {
11820     for (size_t batch_size = 21; batch_size < 40; batch_size++) {
11821       VUnaryMicrokernelTester()
11822         .batch_size(batch_size)
11823         .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11824     }
11825   }
11826 
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X20,inplace)11827   TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X20, inplace) {
11828     for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
11829       VUnaryMicrokernelTester()
11830         .batch_size(batch_size)
11831         .inplace(true)
11832         .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11833     }
11834   }
11835 
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X20,prescale)11836   TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X20, prescale) {
11837     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
11838       for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
11839         VUnaryMicrokernelTester()
11840           .batch_size(batch_size)
11841           .prescale(prescale)
11842           .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11843       }
11844     }
11845   }
11846 
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X20,alpha)11847   TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X20, alpha) {
11848     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
11849       for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
11850         VUnaryMicrokernelTester()
11851           .batch_size(batch_size)
11852           .alpha(alpha)
11853           .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11854       }
11855     }
11856   }
11857 
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X20,beta)11858   TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X20, beta) {
11859     for (float beta : std::vector<float>({0.3f, 3.0f})) {
11860       for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
11861         VUnaryMicrokernelTester()
11862           .batch_size(batch_size)
11863           .beta(beta)
11864           .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11865       }
11866     }
11867   }
11868 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
11869 
11870 
11871 #if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X24,batch_eq_24)11872   TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X24, batch_eq_24) {
11873     VUnaryMicrokernelTester()
11874       .batch_size(24)
11875       .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11876   }
11877 
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X24,batch_div_24)11878   TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X24, batch_div_24) {
11879     for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
11880       VUnaryMicrokernelTester()
11881         .batch_size(batch_size)
11882         .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11883     }
11884   }
11885 
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X24,batch_lt_24)11886   TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X24, batch_lt_24) {
11887     for (size_t batch_size = 1; batch_size < 24; batch_size++) {
11888       VUnaryMicrokernelTester()
11889         .batch_size(batch_size)
11890         .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11891     }
11892   }
11893 
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X24,batch_gt_24)11894   TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X24, batch_gt_24) {
11895     for (size_t batch_size = 25; batch_size < 48; batch_size++) {
11896       VUnaryMicrokernelTester()
11897         .batch_size(batch_size)
11898         .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11899     }
11900   }
11901 
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X24,inplace)11902   TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X24, inplace) {
11903     for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
11904       VUnaryMicrokernelTester()
11905         .batch_size(batch_size)
11906         .inplace(true)
11907         .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11908     }
11909   }
11910 
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X24,prescale)11911   TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X24, prescale) {
11912     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
11913       for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
11914         VUnaryMicrokernelTester()
11915           .batch_size(batch_size)
11916           .prescale(prescale)
11917           .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11918       }
11919     }
11920   }
11921 
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X24,alpha)11922   TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X24, alpha) {
11923     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
11924       for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
11925         VUnaryMicrokernelTester()
11926           .batch_size(batch_size)
11927           .alpha(alpha)
11928           .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11929       }
11930     }
11931   }
11932 
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X24,beta)11933   TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X24, beta) {
11934     for (float beta : std::vector<float>({0.3f, 3.0f})) {
11935       for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
11936         VUnaryMicrokernelTester()
11937           .batch_size(batch_size)
11938           .beta(beta)
11939           .Test(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24, xnn_init_f32_elu_wasmsimd_rr2_p6_params);
11940       }
11941     }
11942   }
11943 #endif  // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
11944 
11945 
11946 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASM_RR2_LUT16_P3_X1,batch_eq_1)11947   TEST(F32_VELU__WASM_RR2_LUT16_P3_X1, batch_eq_1) {
11948     VUnaryMicrokernelTester()
11949       .batch_size(1)
11950       .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x1, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
11951   }
11952 
TEST(F32_VELU__WASM_RR2_LUT16_P3_X1,batch_gt_1)11953   TEST(F32_VELU__WASM_RR2_LUT16_P3_X1, batch_gt_1) {
11954     for (size_t batch_size = 2; batch_size < 10; batch_size++) {
11955       VUnaryMicrokernelTester()
11956         .batch_size(batch_size)
11957         .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x1, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
11958     }
11959   }
11960 
TEST(F32_VELU__WASM_RR2_LUT16_P3_X1,inplace)11961   TEST(F32_VELU__WASM_RR2_LUT16_P3_X1, inplace) {
11962     for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
11963       VUnaryMicrokernelTester()
11964         .batch_size(batch_size)
11965         .inplace(true)
11966         .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x1, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
11967     }
11968   }
11969 
TEST(F32_VELU__WASM_RR2_LUT16_P3_X1,prescale)11970   TEST(F32_VELU__WASM_RR2_LUT16_P3_X1, prescale) {
11971     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
11972       for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
11973         VUnaryMicrokernelTester()
11974           .batch_size(batch_size)
11975           .prescale(prescale)
11976           .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x1, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
11977       }
11978     }
11979   }
11980 
TEST(F32_VELU__WASM_RR2_LUT16_P3_X1,alpha)11981   TEST(F32_VELU__WASM_RR2_LUT16_P3_X1, alpha) {
11982     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
11983       for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
11984         VUnaryMicrokernelTester()
11985           .batch_size(batch_size)
11986           .alpha(alpha)
11987           .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x1, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
11988       }
11989     }
11990   }
11991 
TEST(F32_VELU__WASM_RR2_LUT16_P3_X1,beta)11992   TEST(F32_VELU__WASM_RR2_LUT16_P3_X1, beta) {
11993     for (float beta : std::vector<float>({0.3f, 3.0f})) {
11994       for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
11995         VUnaryMicrokernelTester()
11996           .batch_size(batch_size)
11997           .beta(beta)
11998           .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x1, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
11999       }
12000     }
12001   }
12002 #endif  // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
12003 
12004 
12005 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASM_RR2_LUT16_P3_X2,batch_eq_2)12006   TEST(F32_VELU__WASM_RR2_LUT16_P3_X2, batch_eq_2) {
12007     VUnaryMicrokernelTester()
12008       .batch_size(2)
12009       .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x2, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12010   }
12011 
TEST(F32_VELU__WASM_RR2_LUT16_P3_X2,batch_div_2)12012   TEST(F32_VELU__WASM_RR2_LUT16_P3_X2, batch_div_2) {
12013     for (size_t batch_size = 4; batch_size < 20; batch_size += 2) {
12014       VUnaryMicrokernelTester()
12015         .batch_size(batch_size)
12016         .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x2, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12017     }
12018   }
12019 
TEST(F32_VELU__WASM_RR2_LUT16_P3_X2,batch_lt_2)12020   TEST(F32_VELU__WASM_RR2_LUT16_P3_X2, batch_lt_2) {
12021     for (size_t batch_size = 1; batch_size < 2; batch_size++) {
12022       VUnaryMicrokernelTester()
12023         .batch_size(batch_size)
12024         .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x2, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12025     }
12026   }
12027 
TEST(F32_VELU__WASM_RR2_LUT16_P3_X2,batch_gt_2)12028   TEST(F32_VELU__WASM_RR2_LUT16_P3_X2, batch_gt_2) {
12029     for (size_t batch_size = 3; batch_size < 4; batch_size++) {
12030       VUnaryMicrokernelTester()
12031         .batch_size(batch_size)
12032         .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x2, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12033     }
12034   }
12035 
TEST(F32_VELU__WASM_RR2_LUT16_P3_X2,inplace)12036   TEST(F32_VELU__WASM_RR2_LUT16_P3_X2, inplace) {
12037     for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
12038       VUnaryMicrokernelTester()
12039         .batch_size(batch_size)
12040         .inplace(true)
12041         .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x2, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12042     }
12043   }
12044 
TEST(F32_VELU__WASM_RR2_LUT16_P3_X2,prescale)12045   TEST(F32_VELU__WASM_RR2_LUT16_P3_X2, prescale) {
12046     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
12047       for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
12048         VUnaryMicrokernelTester()
12049           .batch_size(batch_size)
12050           .prescale(prescale)
12051           .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x2, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12052       }
12053     }
12054   }
12055 
TEST(F32_VELU__WASM_RR2_LUT16_P3_X2,alpha)12056   TEST(F32_VELU__WASM_RR2_LUT16_P3_X2, alpha) {
12057     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
12058       for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
12059         VUnaryMicrokernelTester()
12060           .batch_size(batch_size)
12061           .alpha(alpha)
12062           .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x2, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12063       }
12064     }
12065   }
12066 
TEST(F32_VELU__WASM_RR2_LUT16_P3_X2,beta)12067   TEST(F32_VELU__WASM_RR2_LUT16_P3_X2, beta) {
12068     for (float beta : std::vector<float>({0.3f, 3.0f})) {
12069       for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
12070         VUnaryMicrokernelTester()
12071           .batch_size(batch_size)
12072           .beta(beta)
12073           .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x2, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12074       }
12075     }
12076   }
12077 #endif  // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
12078 
12079 
12080 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASM_RR2_LUT16_P3_X3,batch_eq_3)12081   TEST(F32_VELU__WASM_RR2_LUT16_P3_X3, batch_eq_3) {
12082     VUnaryMicrokernelTester()
12083       .batch_size(3)
12084       .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x3, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12085   }
12086 
TEST(F32_VELU__WASM_RR2_LUT16_P3_X3,batch_div_3)12087   TEST(F32_VELU__WASM_RR2_LUT16_P3_X3, batch_div_3) {
12088     for (size_t batch_size = 6; batch_size < 30; batch_size += 3) {
12089       VUnaryMicrokernelTester()
12090         .batch_size(batch_size)
12091         .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x3, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12092     }
12093   }
12094 
TEST(F32_VELU__WASM_RR2_LUT16_P3_X3,batch_lt_3)12095   TEST(F32_VELU__WASM_RR2_LUT16_P3_X3, batch_lt_3) {
12096     for (size_t batch_size = 1; batch_size < 3; batch_size++) {
12097       VUnaryMicrokernelTester()
12098         .batch_size(batch_size)
12099         .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x3, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12100     }
12101   }
12102 
TEST(F32_VELU__WASM_RR2_LUT16_P3_X3,batch_gt_3)12103   TEST(F32_VELU__WASM_RR2_LUT16_P3_X3, batch_gt_3) {
12104     for (size_t batch_size = 4; batch_size < 6; batch_size++) {
12105       VUnaryMicrokernelTester()
12106         .batch_size(batch_size)
12107         .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x3, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12108     }
12109   }
12110 
TEST(F32_VELU__WASM_RR2_LUT16_P3_X3,inplace)12111   TEST(F32_VELU__WASM_RR2_LUT16_P3_X3, inplace) {
12112     for (size_t batch_size = 1; batch_size <= 15; batch_size += 2) {
12113       VUnaryMicrokernelTester()
12114         .batch_size(batch_size)
12115         .inplace(true)
12116         .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x3, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12117     }
12118   }
12119 
TEST(F32_VELU__WASM_RR2_LUT16_P3_X3,prescale)12120   TEST(F32_VELU__WASM_RR2_LUT16_P3_X3, prescale) {
12121     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
12122       for (size_t batch_size = 1; batch_size <= 15; batch_size += 2) {
12123         VUnaryMicrokernelTester()
12124           .batch_size(batch_size)
12125           .prescale(prescale)
12126           .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x3, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12127       }
12128     }
12129   }
12130 
TEST(F32_VELU__WASM_RR2_LUT16_P3_X3,alpha)12131   TEST(F32_VELU__WASM_RR2_LUT16_P3_X3, alpha) {
12132     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
12133       for (size_t batch_size = 1; batch_size <= 15; batch_size += 2) {
12134         VUnaryMicrokernelTester()
12135           .batch_size(batch_size)
12136           .alpha(alpha)
12137           .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x3, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12138       }
12139     }
12140   }
12141 
TEST(F32_VELU__WASM_RR2_LUT16_P3_X3,beta)12142   TEST(F32_VELU__WASM_RR2_LUT16_P3_X3, beta) {
12143     for (float beta : std::vector<float>({0.3f, 3.0f})) {
12144       for (size_t batch_size = 1; batch_size <= 15; batch_size += 2) {
12145         VUnaryMicrokernelTester()
12146           .batch_size(batch_size)
12147           .beta(beta)
12148           .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x3, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12149       }
12150     }
12151   }
12152 #endif  // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
12153 
12154 
12155 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASM_RR2_LUT16_P3_X4,batch_eq_4)12156   TEST(F32_VELU__WASM_RR2_LUT16_P3_X4, batch_eq_4) {
12157     VUnaryMicrokernelTester()
12158       .batch_size(4)
12159       .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x4, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12160   }
12161 
TEST(F32_VELU__WASM_RR2_LUT16_P3_X4,batch_div_4)12162   TEST(F32_VELU__WASM_RR2_LUT16_P3_X4, batch_div_4) {
12163     for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
12164       VUnaryMicrokernelTester()
12165         .batch_size(batch_size)
12166         .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x4, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12167     }
12168   }
12169 
TEST(F32_VELU__WASM_RR2_LUT16_P3_X4,batch_lt_4)12170   TEST(F32_VELU__WASM_RR2_LUT16_P3_X4, batch_lt_4) {
12171     for (size_t batch_size = 1; batch_size < 4; batch_size++) {
12172       VUnaryMicrokernelTester()
12173         .batch_size(batch_size)
12174         .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x4, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12175     }
12176   }
12177 
TEST(F32_VELU__WASM_RR2_LUT16_P3_X4,batch_gt_4)12178   TEST(F32_VELU__WASM_RR2_LUT16_P3_X4, batch_gt_4) {
12179     for (size_t batch_size = 5; batch_size < 8; batch_size++) {
12180       VUnaryMicrokernelTester()
12181         .batch_size(batch_size)
12182         .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x4, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12183     }
12184   }
12185 
TEST(F32_VELU__WASM_RR2_LUT16_P3_X4,inplace)12186   TEST(F32_VELU__WASM_RR2_LUT16_P3_X4, inplace) {
12187     for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
12188       VUnaryMicrokernelTester()
12189         .batch_size(batch_size)
12190         .inplace(true)
12191         .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x4, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12192     }
12193   }
12194 
TEST(F32_VELU__WASM_RR2_LUT16_P3_X4,prescale)12195   TEST(F32_VELU__WASM_RR2_LUT16_P3_X4, prescale) {
12196     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
12197       for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
12198         VUnaryMicrokernelTester()
12199           .batch_size(batch_size)
12200           .prescale(prescale)
12201           .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x4, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12202       }
12203     }
12204   }
12205 
TEST(F32_VELU__WASM_RR2_LUT16_P3_X4,alpha)12206   TEST(F32_VELU__WASM_RR2_LUT16_P3_X4, alpha) {
12207     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
12208       for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
12209         VUnaryMicrokernelTester()
12210           .batch_size(batch_size)
12211           .alpha(alpha)
12212           .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x4, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12213       }
12214     }
12215   }
12216 
TEST(F32_VELU__WASM_RR2_LUT16_P3_X4,beta)12217   TEST(F32_VELU__WASM_RR2_LUT16_P3_X4, beta) {
12218     for (float beta : std::vector<float>({0.3f, 3.0f})) {
12219       for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
12220         VUnaryMicrokernelTester()
12221           .batch_size(batch_size)
12222           .beta(beta)
12223           .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x4, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12224       }
12225     }
12226   }
12227 #endif  // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
12228 
12229 
12230 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASM_RR2_LUT16_P3_X5,batch_eq_5)12231   TEST(F32_VELU__WASM_RR2_LUT16_P3_X5, batch_eq_5) {
12232     VUnaryMicrokernelTester()
12233       .batch_size(5)
12234       .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12235   }
12236 
TEST(F32_VELU__WASM_RR2_LUT16_P3_X5,batch_div_5)12237   TEST(F32_VELU__WASM_RR2_LUT16_P3_X5, batch_div_5) {
12238     for (size_t batch_size = 10; batch_size < 50; batch_size += 5) {
12239       VUnaryMicrokernelTester()
12240         .batch_size(batch_size)
12241         .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12242     }
12243   }
12244 
TEST(F32_VELU__WASM_RR2_LUT16_P3_X5,batch_lt_5)12245   TEST(F32_VELU__WASM_RR2_LUT16_P3_X5, batch_lt_5) {
12246     for (size_t batch_size = 1; batch_size < 5; batch_size++) {
12247       VUnaryMicrokernelTester()
12248         .batch_size(batch_size)
12249         .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12250     }
12251   }
12252 
TEST(F32_VELU__WASM_RR2_LUT16_P3_X5,batch_gt_5)12253   TEST(F32_VELU__WASM_RR2_LUT16_P3_X5, batch_gt_5) {
12254     for (size_t batch_size = 6; batch_size < 10; batch_size++) {
12255       VUnaryMicrokernelTester()
12256         .batch_size(batch_size)
12257         .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12258     }
12259   }
12260 
TEST(F32_VELU__WASM_RR2_LUT16_P3_X5,inplace)12261   TEST(F32_VELU__WASM_RR2_LUT16_P3_X5, inplace) {
12262     for (size_t batch_size = 1; batch_size <= 25; batch_size += 4) {
12263       VUnaryMicrokernelTester()
12264         .batch_size(batch_size)
12265         .inplace(true)
12266         .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12267     }
12268   }
12269 
TEST(F32_VELU__WASM_RR2_LUT16_P3_X5,prescale)12270   TEST(F32_VELU__WASM_RR2_LUT16_P3_X5, prescale) {
12271     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
12272       for (size_t batch_size = 1; batch_size <= 25; batch_size += 4) {
12273         VUnaryMicrokernelTester()
12274           .batch_size(batch_size)
12275           .prescale(prescale)
12276           .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12277       }
12278     }
12279   }
12280 
TEST(F32_VELU__WASM_RR2_LUT16_P3_X5,alpha)12281   TEST(F32_VELU__WASM_RR2_LUT16_P3_X5, alpha) {
12282     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
12283       for (size_t batch_size = 1; batch_size <= 25; batch_size += 4) {
12284         VUnaryMicrokernelTester()
12285           .batch_size(batch_size)
12286           .alpha(alpha)
12287           .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12288       }
12289     }
12290   }
12291 
TEST(F32_VELU__WASM_RR2_LUT16_P3_X5,beta)12292   TEST(F32_VELU__WASM_RR2_LUT16_P3_X5, beta) {
12293     for (float beta : std::vector<float>({0.3f, 3.0f})) {
12294       for (size_t batch_size = 1; batch_size <= 25; batch_size += 4) {
12295         VUnaryMicrokernelTester()
12296           .batch_size(batch_size)
12297           .beta(beta)
12298           .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12299       }
12300     }
12301   }
12302 #endif  // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
12303 
12304 
12305 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASM_RR2_LUT16_P3_X6,batch_eq_6)12306   TEST(F32_VELU__WASM_RR2_LUT16_P3_X6, batch_eq_6) {
12307     VUnaryMicrokernelTester()
12308       .batch_size(6)
12309       .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12310   }
12311 
TEST(F32_VELU__WASM_RR2_LUT16_P3_X6,batch_div_6)12312   TEST(F32_VELU__WASM_RR2_LUT16_P3_X6, batch_div_6) {
12313     for (size_t batch_size = 12; batch_size < 60; batch_size += 6) {
12314       VUnaryMicrokernelTester()
12315         .batch_size(batch_size)
12316         .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12317     }
12318   }
12319 
TEST(F32_VELU__WASM_RR2_LUT16_P3_X6,batch_lt_6)12320   TEST(F32_VELU__WASM_RR2_LUT16_P3_X6, batch_lt_6) {
12321     for (size_t batch_size = 1; batch_size < 6; batch_size++) {
12322       VUnaryMicrokernelTester()
12323         .batch_size(batch_size)
12324         .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12325     }
12326   }
12327 
TEST(F32_VELU__WASM_RR2_LUT16_P3_X6,batch_gt_6)12328   TEST(F32_VELU__WASM_RR2_LUT16_P3_X6, batch_gt_6) {
12329     for (size_t batch_size = 7; batch_size < 12; batch_size++) {
12330       VUnaryMicrokernelTester()
12331         .batch_size(batch_size)
12332         .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12333     }
12334   }
12335 
TEST(F32_VELU__WASM_RR2_LUT16_P3_X6,inplace)12336   TEST(F32_VELU__WASM_RR2_LUT16_P3_X6, inplace) {
12337     for (size_t batch_size = 1; batch_size <= 30; batch_size += 5) {
12338       VUnaryMicrokernelTester()
12339         .batch_size(batch_size)
12340         .inplace(true)
12341         .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12342     }
12343   }
12344 
TEST(F32_VELU__WASM_RR2_LUT16_P3_X6,prescale)12345   TEST(F32_VELU__WASM_RR2_LUT16_P3_X6, prescale) {
12346     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
12347       for (size_t batch_size = 1; batch_size <= 30; batch_size += 5) {
12348         VUnaryMicrokernelTester()
12349           .batch_size(batch_size)
12350           .prescale(prescale)
12351           .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12352       }
12353     }
12354   }
12355 
TEST(F32_VELU__WASM_RR2_LUT16_P3_X6,alpha)12356   TEST(F32_VELU__WASM_RR2_LUT16_P3_X6, alpha) {
12357     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
12358       for (size_t batch_size = 1; batch_size <= 30; batch_size += 5) {
12359         VUnaryMicrokernelTester()
12360           .batch_size(batch_size)
12361           .alpha(alpha)
12362           .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12363       }
12364     }
12365   }
12366 
TEST(F32_VELU__WASM_RR2_LUT16_P3_X6,beta)12367   TEST(F32_VELU__WASM_RR2_LUT16_P3_X6, beta) {
12368     for (float beta : std::vector<float>({0.3f, 3.0f})) {
12369       for (size_t batch_size = 1; batch_size <= 30; batch_size += 5) {
12370         VUnaryMicrokernelTester()
12371           .batch_size(batch_size)
12372           .beta(beta)
12373           .Test(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12374       }
12375     }
12376   }
12377 #endif  // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
12378 
12379 
12380 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASM_RR2_P6_X1,batch_eq_1)12381   TEST(F32_VELU__WASM_RR2_P6_X1, batch_eq_1) {
12382     VUnaryMicrokernelTester()
12383       .batch_size(1)
12384       .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x1, xnn_init_f32_elu_scalar_rr2_p6_params);
12385   }
12386 
TEST(F32_VELU__WASM_RR2_P6_X1,batch_gt_1)12387   TEST(F32_VELU__WASM_RR2_P6_X1, batch_gt_1) {
12388     for (size_t batch_size = 2; batch_size < 10; batch_size++) {
12389       VUnaryMicrokernelTester()
12390         .batch_size(batch_size)
12391         .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x1, xnn_init_f32_elu_scalar_rr2_p6_params);
12392     }
12393   }
12394 
TEST(F32_VELU__WASM_RR2_P6_X1,inplace)12395   TEST(F32_VELU__WASM_RR2_P6_X1, inplace) {
12396     for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
12397       VUnaryMicrokernelTester()
12398         .batch_size(batch_size)
12399         .inplace(true)
12400         .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x1, xnn_init_f32_elu_scalar_rr2_p6_params);
12401     }
12402   }
12403 
TEST(F32_VELU__WASM_RR2_P6_X1,prescale)12404   TEST(F32_VELU__WASM_RR2_P6_X1, prescale) {
12405     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
12406       for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
12407         VUnaryMicrokernelTester()
12408           .batch_size(batch_size)
12409           .prescale(prescale)
12410           .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x1, xnn_init_f32_elu_scalar_rr2_p6_params);
12411       }
12412     }
12413   }
12414 
TEST(F32_VELU__WASM_RR2_P6_X1,alpha)12415   TEST(F32_VELU__WASM_RR2_P6_X1, alpha) {
12416     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
12417       for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
12418         VUnaryMicrokernelTester()
12419           .batch_size(batch_size)
12420           .alpha(alpha)
12421           .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x1, xnn_init_f32_elu_scalar_rr2_p6_params);
12422       }
12423     }
12424   }
12425 
TEST(F32_VELU__WASM_RR2_P6_X1,beta)12426   TEST(F32_VELU__WASM_RR2_P6_X1, beta) {
12427     for (float beta : std::vector<float>({0.3f, 3.0f})) {
12428       for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
12429         VUnaryMicrokernelTester()
12430           .batch_size(batch_size)
12431           .beta(beta)
12432           .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x1, xnn_init_f32_elu_scalar_rr2_p6_params);
12433       }
12434     }
12435   }
12436 #endif  // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
12437 
12438 
12439 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASM_RR2_P6_X2,batch_eq_2)12440   TEST(F32_VELU__WASM_RR2_P6_X2, batch_eq_2) {
12441     VUnaryMicrokernelTester()
12442       .batch_size(2)
12443       .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x2, xnn_init_f32_elu_scalar_rr2_p6_params);
12444   }
12445 
TEST(F32_VELU__WASM_RR2_P6_X2,batch_div_2)12446   TEST(F32_VELU__WASM_RR2_P6_X2, batch_div_2) {
12447     for (size_t batch_size = 4; batch_size < 20; batch_size += 2) {
12448       VUnaryMicrokernelTester()
12449         .batch_size(batch_size)
12450         .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x2, xnn_init_f32_elu_scalar_rr2_p6_params);
12451     }
12452   }
12453 
TEST(F32_VELU__WASM_RR2_P6_X2,batch_lt_2)12454   TEST(F32_VELU__WASM_RR2_P6_X2, batch_lt_2) {
12455     for (size_t batch_size = 1; batch_size < 2; batch_size++) {
12456       VUnaryMicrokernelTester()
12457         .batch_size(batch_size)
12458         .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x2, xnn_init_f32_elu_scalar_rr2_p6_params);
12459     }
12460   }
12461 
TEST(F32_VELU__WASM_RR2_P6_X2,batch_gt_2)12462   TEST(F32_VELU__WASM_RR2_P6_X2, batch_gt_2) {
12463     for (size_t batch_size = 3; batch_size < 4; batch_size++) {
12464       VUnaryMicrokernelTester()
12465         .batch_size(batch_size)
12466         .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x2, xnn_init_f32_elu_scalar_rr2_p6_params);
12467     }
12468   }
12469 
TEST(F32_VELU__WASM_RR2_P6_X2,inplace)12470   TEST(F32_VELU__WASM_RR2_P6_X2, inplace) {
12471     for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
12472       VUnaryMicrokernelTester()
12473         .batch_size(batch_size)
12474         .inplace(true)
12475         .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x2, xnn_init_f32_elu_scalar_rr2_p6_params);
12476     }
12477   }
12478 
TEST(F32_VELU__WASM_RR2_P6_X2,prescale)12479   TEST(F32_VELU__WASM_RR2_P6_X2, prescale) {
12480     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
12481       for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
12482         VUnaryMicrokernelTester()
12483           .batch_size(batch_size)
12484           .prescale(prescale)
12485           .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x2, xnn_init_f32_elu_scalar_rr2_p6_params);
12486       }
12487     }
12488   }
12489 
TEST(F32_VELU__WASM_RR2_P6_X2,alpha)12490   TEST(F32_VELU__WASM_RR2_P6_X2, alpha) {
12491     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
12492       for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
12493         VUnaryMicrokernelTester()
12494           .batch_size(batch_size)
12495           .alpha(alpha)
12496           .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x2, xnn_init_f32_elu_scalar_rr2_p6_params);
12497       }
12498     }
12499   }
12500 
TEST(F32_VELU__WASM_RR2_P6_X2,beta)12501   TEST(F32_VELU__WASM_RR2_P6_X2, beta) {
12502     for (float beta : std::vector<float>({0.3f, 3.0f})) {
12503       for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
12504         VUnaryMicrokernelTester()
12505           .batch_size(batch_size)
12506           .beta(beta)
12507           .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x2, xnn_init_f32_elu_scalar_rr2_p6_params);
12508       }
12509     }
12510   }
12511 #endif  // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
12512 
12513 
12514 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASM_RR2_P6_X3,batch_eq_3)12515   TEST(F32_VELU__WASM_RR2_P6_X3, batch_eq_3) {
12516     VUnaryMicrokernelTester()
12517       .batch_size(3)
12518       .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x3, xnn_init_f32_elu_scalar_rr2_p6_params);
12519   }
12520 
TEST(F32_VELU__WASM_RR2_P6_X3,batch_div_3)12521   TEST(F32_VELU__WASM_RR2_P6_X3, batch_div_3) {
12522     for (size_t batch_size = 6; batch_size < 30; batch_size += 3) {
12523       VUnaryMicrokernelTester()
12524         .batch_size(batch_size)
12525         .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x3, xnn_init_f32_elu_scalar_rr2_p6_params);
12526     }
12527   }
12528 
TEST(F32_VELU__WASM_RR2_P6_X3,batch_lt_3)12529   TEST(F32_VELU__WASM_RR2_P6_X3, batch_lt_3) {
12530     for (size_t batch_size = 1; batch_size < 3; batch_size++) {
12531       VUnaryMicrokernelTester()
12532         .batch_size(batch_size)
12533         .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x3, xnn_init_f32_elu_scalar_rr2_p6_params);
12534     }
12535   }
12536 
TEST(F32_VELU__WASM_RR2_P6_X3,batch_gt_3)12537   TEST(F32_VELU__WASM_RR2_P6_X3, batch_gt_3) {
12538     for (size_t batch_size = 4; batch_size < 6; batch_size++) {
12539       VUnaryMicrokernelTester()
12540         .batch_size(batch_size)
12541         .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x3, xnn_init_f32_elu_scalar_rr2_p6_params);
12542     }
12543   }
12544 
TEST(F32_VELU__WASM_RR2_P6_X3,inplace)12545   TEST(F32_VELU__WASM_RR2_P6_X3, inplace) {
12546     for (size_t batch_size = 1; batch_size <= 15; batch_size += 2) {
12547       VUnaryMicrokernelTester()
12548         .batch_size(batch_size)
12549         .inplace(true)
12550         .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x3, xnn_init_f32_elu_scalar_rr2_p6_params);
12551     }
12552   }
12553 
TEST(F32_VELU__WASM_RR2_P6_X3,prescale)12554   TEST(F32_VELU__WASM_RR2_P6_X3, prescale) {
12555     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
12556       for (size_t batch_size = 1; batch_size <= 15; batch_size += 2) {
12557         VUnaryMicrokernelTester()
12558           .batch_size(batch_size)
12559           .prescale(prescale)
12560           .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x3, xnn_init_f32_elu_scalar_rr2_p6_params);
12561       }
12562     }
12563   }
12564 
TEST(F32_VELU__WASM_RR2_P6_X3,alpha)12565   TEST(F32_VELU__WASM_RR2_P6_X3, alpha) {
12566     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
12567       for (size_t batch_size = 1; batch_size <= 15; batch_size += 2) {
12568         VUnaryMicrokernelTester()
12569           .batch_size(batch_size)
12570           .alpha(alpha)
12571           .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x3, xnn_init_f32_elu_scalar_rr2_p6_params);
12572       }
12573     }
12574   }
12575 
TEST(F32_VELU__WASM_RR2_P6_X3,beta)12576   TEST(F32_VELU__WASM_RR2_P6_X3, beta) {
12577     for (float beta : std::vector<float>({0.3f, 3.0f})) {
12578       for (size_t batch_size = 1; batch_size <= 15; batch_size += 2) {
12579         VUnaryMicrokernelTester()
12580           .batch_size(batch_size)
12581           .beta(beta)
12582           .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x3, xnn_init_f32_elu_scalar_rr2_p6_params);
12583       }
12584     }
12585   }
12586 #endif  // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
12587 
12588 
12589 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASM_RR2_P6_X4,batch_eq_4)12590   TEST(F32_VELU__WASM_RR2_P6_X4, batch_eq_4) {
12591     VUnaryMicrokernelTester()
12592       .batch_size(4)
12593       .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x4, xnn_init_f32_elu_scalar_rr2_p6_params);
12594   }
12595 
TEST(F32_VELU__WASM_RR2_P6_X4,batch_div_4)12596   TEST(F32_VELU__WASM_RR2_P6_X4, batch_div_4) {
12597     for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
12598       VUnaryMicrokernelTester()
12599         .batch_size(batch_size)
12600         .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x4, xnn_init_f32_elu_scalar_rr2_p6_params);
12601     }
12602   }
12603 
TEST(F32_VELU__WASM_RR2_P6_X4,batch_lt_4)12604   TEST(F32_VELU__WASM_RR2_P6_X4, batch_lt_4) {
12605     for (size_t batch_size = 1; batch_size < 4; batch_size++) {
12606       VUnaryMicrokernelTester()
12607         .batch_size(batch_size)
12608         .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x4, xnn_init_f32_elu_scalar_rr2_p6_params);
12609     }
12610   }
12611 
TEST(F32_VELU__WASM_RR2_P6_X4,batch_gt_4)12612   TEST(F32_VELU__WASM_RR2_P6_X4, batch_gt_4) {
12613     for (size_t batch_size = 5; batch_size < 8; batch_size++) {
12614       VUnaryMicrokernelTester()
12615         .batch_size(batch_size)
12616         .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x4, xnn_init_f32_elu_scalar_rr2_p6_params);
12617     }
12618   }
12619 
TEST(F32_VELU__WASM_RR2_P6_X4,inplace)12620   TEST(F32_VELU__WASM_RR2_P6_X4, inplace) {
12621     for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
12622       VUnaryMicrokernelTester()
12623         .batch_size(batch_size)
12624         .inplace(true)
12625         .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x4, xnn_init_f32_elu_scalar_rr2_p6_params);
12626     }
12627   }
12628 
TEST(F32_VELU__WASM_RR2_P6_X4,prescale)12629   TEST(F32_VELU__WASM_RR2_P6_X4, prescale) {
12630     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
12631       for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
12632         VUnaryMicrokernelTester()
12633           .batch_size(batch_size)
12634           .prescale(prescale)
12635           .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x4, xnn_init_f32_elu_scalar_rr2_p6_params);
12636       }
12637     }
12638   }
12639 
TEST(F32_VELU__WASM_RR2_P6_X4,alpha)12640   TEST(F32_VELU__WASM_RR2_P6_X4, alpha) {
12641     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
12642       for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
12643         VUnaryMicrokernelTester()
12644           .batch_size(batch_size)
12645           .alpha(alpha)
12646           .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x4, xnn_init_f32_elu_scalar_rr2_p6_params);
12647       }
12648     }
12649   }
12650 
TEST(F32_VELU__WASM_RR2_P6_X4,beta)12651   TEST(F32_VELU__WASM_RR2_P6_X4, beta) {
12652     for (float beta : std::vector<float>({0.3f, 3.0f})) {
12653       for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
12654         VUnaryMicrokernelTester()
12655           .batch_size(batch_size)
12656           .beta(beta)
12657           .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x4, xnn_init_f32_elu_scalar_rr2_p6_params);
12658       }
12659     }
12660   }
12661 #endif  // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
12662 
12663 
12664 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASM_RR2_P6_X5,batch_eq_5)12665   TEST(F32_VELU__WASM_RR2_P6_X5, batch_eq_5) {
12666     VUnaryMicrokernelTester()
12667       .batch_size(5)
12668       .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x5, xnn_init_f32_elu_scalar_rr2_p6_params);
12669   }
12670 
TEST(F32_VELU__WASM_RR2_P6_X5,batch_div_5)12671   TEST(F32_VELU__WASM_RR2_P6_X5, batch_div_5) {
12672     for (size_t batch_size = 10; batch_size < 50; batch_size += 5) {
12673       VUnaryMicrokernelTester()
12674         .batch_size(batch_size)
12675         .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x5, xnn_init_f32_elu_scalar_rr2_p6_params);
12676     }
12677   }
12678 
TEST(F32_VELU__WASM_RR2_P6_X5,batch_lt_5)12679   TEST(F32_VELU__WASM_RR2_P6_X5, batch_lt_5) {
12680     for (size_t batch_size = 1; batch_size < 5; batch_size++) {
12681       VUnaryMicrokernelTester()
12682         .batch_size(batch_size)
12683         .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x5, xnn_init_f32_elu_scalar_rr2_p6_params);
12684     }
12685   }
12686 
TEST(F32_VELU__WASM_RR2_P6_X5,batch_gt_5)12687   TEST(F32_VELU__WASM_RR2_P6_X5, batch_gt_5) {
12688     for (size_t batch_size = 6; batch_size < 10; batch_size++) {
12689       VUnaryMicrokernelTester()
12690         .batch_size(batch_size)
12691         .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x5, xnn_init_f32_elu_scalar_rr2_p6_params);
12692     }
12693   }
12694 
TEST(F32_VELU__WASM_RR2_P6_X5,inplace)12695   TEST(F32_VELU__WASM_RR2_P6_X5, inplace) {
12696     for (size_t batch_size = 1; batch_size <= 25; batch_size += 4) {
12697       VUnaryMicrokernelTester()
12698         .batch_size(batch_size)
12699         .inplace(true)
12700         .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x5, xnn_init_f32_elu_scalar_rr2_p6_params);
12701     }
12702   }
12703 
TEST(F32_VELU__WASM_RR2_P6_X5,prescale)12704   TEST(F32_VELU__WASM_RR2_P6_X5, prescale) {
12705     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
12706       for (size_t batch_size = 1; batch_size <= 25; batch_size += 4) {
12707         VUnaryMicrokernelTester()
12708           .batch_size(batch_size)
12709           .prescale(prescale)
12710           .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x5, xnn_init_f32_elu_scalar_rr2_p6_params);
12711       }
12712     }
12713   }
12714 
TEST(F32_VELU__WASM_RR2_P6_X5,alpha)12715   TEST(F32_VELU__WASM_RR2_P6_X5, alpha) {
12716     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
12717       for (size_t batch_size = 1; batch_size <= 25; batch_size += 4) {
12718         VUnaryMicrokernelTester()
12719           .batch_size(batch_size)
12720           .alpha(alpha)
12721           .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x5, xnn_init_f32_elu_scalar_rr2_p6_params);
12722       }
12723     }
12724   }
12725 
TEST(F32_VELU__WASM_RR2_P6_X5,beta)12726   TEST(F32_VELU__WASM_RR2_P6_X5, beta) {
12727     for (float beta : std::vector<float>({0.3f, 3.0f})) {
12728       for (size_t batch_size = 1; batch_size <= 25; batch_size += 4) {
12729         VUnaryMicrokernelTester()
12730           .batch_size(batch_size)
12731           .beta(beta)
12732           .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x5, xnn_init_f32_elu_scalar_rr2_p6_params);
12733       }
12734     }
12735   }
12736 #endif  // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
12737 
12738 
12739 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
TEST(F32_VELU__WASM_RR2_P6_X6,batch_eq_6)12740   TEST(F32_VELU__WASM_RR2_P6_X6, batch_eq_6) {
12741     VUnaryMicrokernelTester()
12742       .batch_size(6)
12743       .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x6, xnn_init_f32_elu_scalar_rr2_p6_params);
12744   }
12745 
TEST(F32_VELU__WASM_RR2_P6_X6,batch_div_6)12746   TEST(F32_VELU__WASM_RR2_P6_X6, batch_div_6) {
12747     for (size_t batch_size = 12; batch_size < 60; batch_size += 6) {
12748       VUnaryMicrokernelTester()
12749         .batch_size(batch_size)
12750         .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x6, xnn_init_f32_elu_scalar_rr2_p6_params);
12751     }
12752   }
12753 
TEST(F32_VELU__WASM_RR2_P6_X6,batch_lt_6)12754   TEST(F32_VELU__WASM_RR2_P6_X6, batch_lt_6) {
12755     for (size_t batch_size = 1; batch_size < 6; batch_size++) {
12756       VUnaryMicrokernelTester()
12757         .batch_size(batch_size)
12758         .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x6, xnn_init_f32_elu_scalar_rr2_p6_params);
12759     }
12760   }
12761 
TEST(F32_VELU__WASM_RR2_P6_X6,batch_gt_6)12762   TEST(F32_VELU__WASM_RR2_P6_X6, batch_gt_6) {
12763     for (size_t batch_size = 7; batch_size < 12; batch_size++) {
12764       VUnaryMicrokernelTester()
12765         .batch_size(batch_size)
12766         .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x6, xnn_init_f32_elu_scalar_rr2_p6_params);
12767     }
12768   }
12769 
TEST(F32_VELU__WASM_RR2_P6_X6,inplace)12770   TEST(F32_VELU__WASM_RR2_P6_X6, inplace) {
12771     for (size_t batch_size = 1; batch_size <= 30; batch_size += 5) {
12772       VUnaryMicrokernelTester()
12773         .batch_size(batch_size)
12774         .inplace(true)
12775         .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x6, xnn_init_f32_elu_scalar_rr2_p6_params);
12776     }
12777   }
12778 
TEST(F32_VELU__WASM_RR2_P6_X6,prescale)12779   TEST(F32_VELU__WASM_RR2_P6_X6, prescale) {
12780     for (float prescale : std::vector<float>({0.1f, 10.0f})) {
12781       for (size_t batch_size = 1; batch_size <= 30; batch_size += 5) {
12782         VUnaryMicrokernelTester()
12783           .batch_size(batch_size)
12784           .prescale(prescale)
12785           .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x6, xnn_init_f32_elu_scalar_rr2_p6_params);
12786       }
12787     }
12788   }
12789 
TEST(F32_VELU__WASM_RR2_P6_X6,alpha)12790   TEST(F32_VELU__WASM_RR2_P6_X6, alpha) {
12791     for (float alpha : std::vector<float>({0.3f, 3.0f})) {
12792       for (size_t batch_size = 1; batch_size <= 30; batch_size += 5) {
12793         VUnaryMicrokernelTester()
12794           .batch_size(batch_size)
12795           .alpha(alpha)
12796           .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x6, xnn_init_f32_elu_scalar_rr2_p6_params);
12797       }
12798     }
12799   }
12800 
TEST(F32_VELU__WASM_RR2_P6_X6,beta)12801   TEST(F32_VELU__WASM_RR2_P6_X6, beta) {
12802     for (float beta : std::vector<float>({0.3f, 3.0f})) {
12803       for (size_t batch_size = 1; batch_size <= 30; batch_size += 5) {
12804         VUnaryMicrokernelTester()
12805           .batch_size(batch_size)
12806           .beta(beta)
12807           .Test(xnn_f32_velu_ukernel__wasm_rr2_p6_x6, xnn_init_f32_elu_scalar_rr2_p6_params);
12808       }
12809     }
12810   }
12811 #endif  // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
12812 
12813 
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X1,batch_eq_1)12814 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X1, batch_eq_1) {
12815   VUnaryMicrokernelTester()
12816     .batch_size(1)
12817     .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x1, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12818 }
12819 
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X1,batch_gt_1)12820 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X1, batch_gt_1) {
12821   for (size_t batch_size = 2; batch_size < 10; batch_size++) {
12822     VUnaryMicrokernelTester()
12823       .batch_size(batch_size)
12824       .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x1, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12825   }
12826 }
12827 
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X1,inplace)12828 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X1, inplace) {
12829   for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
12830     VUnaryMicrokernelTester()
12831       .batch_size(batch_size)
12832       .inplace(true)
12833       .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x1, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12834   }
12835 }
12836 
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X1,prescale)12837 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X1, prescale) {
12838   for (float prescale : std::vector<float>({0.1f, 10.0f})) {
12839     for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
12840       VUnaryMicrokernelTester()
12841         .batch_size(batch_size)
12842         .prescale(prescale)
12843         .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x1, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12844     }
12845   }
12846 }
12847 
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X1,alpha)12848 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X1, alpha) {
12849   for (float alpha : std::vector<float>({0.3f, 3.0f})) {
12850     for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
12851       VUnaryMicrokernelTester()
12852         .batch_size(batch_size)
12853         .alpha(alpha)
12854         .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x1, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12855     }
12856   }
12857 }
12858 
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X1,beta)12859 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X1, beta) {
12860   for (float beta : std::vector<float>({0.3f, 3.0f})) {
12861     for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
12862       VUnaryMicrokernelTester()
12863         .batch_size(batch_size)
12864         .beta(beta)
12865         .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x1, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12866     }
12867   }
12868 }
12869 
12870 
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X2,batch_eq_2)12871 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X2, batch_eq_2) {
12872   VUnaryMicrokernelTester()
12873     .batch_size(2)
12874     .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x2, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12875 }
12876 
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X2,batch_div_2)12877 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X2, batch_div_2) {
12878   for (size_t batch_size = 4; batch_size < 20; batch_size += 2) {
12879     VUnaryMicrokernelTester()
12880       .batch_size(batch_size)
12881       .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x2, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12882   }
12883 }
12884 
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X2,batch_lt_2)12885 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X2, batch_lt_2) {
12886   for (size_t batch_size = 1; batch_size < 2; batch_size++) {
12887     VUnaryMicrokernelTester()
12888       .batch_size(batch_size)
12889       .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x2, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12890   }
12891 }
12892 
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X2,batch_gt_2)12893 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X2, batch_gt_2) {
12894   for (size_t batch_size = 3; batch_size < 4; batch_size++) {
12895     VUnaryMicrokernelTester()
12896       .batch_size(batch_size)
12897       .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x2, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12898   }
12899 }
12900 
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X2,inplace)12901 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X2, inplace) {
12902   for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
12903     VUnaryMicrokernelTester()
12904       .batch_size(batch_size)
12905       .inplace(true)
12906       .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x2, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12907   }
12908 }
12909 
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X2,prescale)12910 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X2, prescale) {
12911   for (float prescale : std::vector<float>({0.1f, 10.0f})) {
12912     for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
12913       VUnaryMicrokernelTester()
12914         .batch_size(batch_size)
12915         .prescale(prescale)
12916         .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x2, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12917     }
12918   }
12919 }
12920 
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X2,alpha)12921 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X2, alpha) {
12922   for (float alpha : std::vector<float>({0.3f, 3.0f})) {
12923     for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
12924       VUnaryMicrokernelTester()
12925         .batch_size(batch_size)
12926         .alpha(alpha)
12927         .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x2, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12928     }
12929   }
12930 }
12931 
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X2,beta)12932 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X2, beta) {
12933   for (float beta : std::vector<float>({0.3f, 3.0f})) {
12934     for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
12935       VUnaryMicrokernelTester()
12936         .batch_size(batch_size)
12937         .beta(beta)
12938         .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x2, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12939     }
12940   }
12941 }
12942 
12943 
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X3,batch_eq_3)12944 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X3, batch_eq_3) {
12945   VUnaryMicrokernelTester()
12946     .batch_size(3)
12947     .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x3, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12948 }
12949 
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X3,batch_div_3)12950 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X3, batch_div_3) {
12951   for (size_t batch_size = 6; batch_size < 30; batch_size += 3) {
12952     VUnaryMicrokernelTester()
12953       .batch_size(batch_size)
12954       .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x3, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12955   }
12956 }
12957 
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X3,batch_lt_3)12958 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X3, batch_lt_3) {
12959   for (size_t batch_size = 1; batch_size < 3; batch_size++) {
12960     VUnaryMicrokernelTester()
12961       .batch_size(batch_size)
12962       .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x3, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12963   }
12964 }
12965 
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X3,batch_gt_3)12966 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X3, batch_gt_3) {
12967   for (size_t batch_size = 4; batch_size < 6; batch_size++) {
12968     VUnaryMicrokernelTester()
12969       .batch_size(batch_size)
12970       .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x3, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12971   }
12972 }
12973 
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X3,inplace)12974 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X3, inplace) {
12975   for (size_t batch_size = 1; batch_size <= 15; batch_size += 2) {
12976     VUnaryMicrokernelTester()
12977       .batch_size(batch_size)
12978       .inplace(true)
12979       .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x3, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12980   }
12981 }
12982 
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X3,prescale)12983 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X3, prescale) {
12984   for (float prescale : std::vector<float>({0.1f, 10.0f})) {
12985     for (size_t batch_size = 1; batch_size <= 15; batch_size += 2) {
12986       VUnaryMicrokernelTester()
12987         .batch_size(batch_size)
12988         .prescale(prescale)
12989         .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x3, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
12990     }
12991   }
12992 }
12993 
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X3,alpha)12994 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X3, alpha) {
12995   for (float alpha : std::vector<float>({0.3f, 3.0f})) {
12996     for (size_t batch_size = 1; batch_size <= 15; batch_size += 2) {
12997       VUnaryMicrokernelTester()
12998         .batch_size(batch_size)
12999         .alpha(alpha)
13000         .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x3, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
13001     }
13002   }
13003 }
13004 
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X3,beta)13005 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X3, beta) {
13006   for (float beta : std::vector<float>({0.3f, 3.0f})) {
13007     for (size_t batch_size = 1; batch_size <= 15; batch_size += 2) {
13008       VUnaryMicrokernelTester()
13009         .batch_size(batch_size)
13010         .beta(beta)
13011         .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x3, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
13012     }
13013   }
13014 }
13015 
13016 
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X4,batch_eq_4)13017 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X4, batch_eq_4) {
13018   VUnaryMicrokernelTester()
13019     .batch_size(4)
13020     .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
13021 }
13022 
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X4,batch_div_4)13023 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X4, batch_div_4) {
13024   for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
13025     VUnaryMicrokernelTester()
13026       .batch_size(batch_size)
13027       .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
13028   }
13029 }
13030 
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X4,batch_lt_4)13031 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X4, batch_lt_4) {
13032   for (size_t batch_size = 1; batch_size < 4; batch_size++) {
13033     VUnaryMicrokernelTester()
13034       .batch_size(batch_size)
13035       .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
13036   }
13037 }
13038 
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X4,batch_gt_4)13039 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X4, batch_gt_4) {
13040   for (size_t batch_size = 5; batch_size < 8; batch_size++) {
13041     VUnaryMicrokernelTester()
13042       .batch_size(batch_size)
13043       .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
13044   }
13045 }
13046 
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X4,inplace)13047 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X4, inplace) {
13048   for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
13049     VUnaryMicrokernelTester()
13050       .batch_size(batch_size)
13051       .inplace(true)
13052       .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
13053   }
13054 }
13055 
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X4,prescale)13056 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X4, prescale) {
13057   for (float prescale : std::vector<float>({0.1f, 10.0f})) {
13058     for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
13059       VUnaryMicrokernelTester()
13060         .batch_size(batch_size)
13061         .prescale(prescale)
13062         .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
13063     }
13064   }
13065 }
13066 
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X4,alpha)13067 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X4, alpha) {
13068   for (float alpha : std::vector<float>({0.3f, 3.0f})) {
13069     for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
13070       VUnaryMicrokernelTester()
13071         .batch_size(batch_size)
13072         .alpha(alpha)
13073         .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
13074     }
13075   }
13076 }
13077 
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X4,beta)13078 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X4, beta) {
13079   for (float beta : std::vector<float>({0.3f, 3.0f})) {
13080     for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
13081       VUnaryMicrokernelTester()
13082         .batch_size(batch_size)
13083         .beta(beta)
13084         .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
13085     }
13086   }
13087 }
13088 
13089 
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X5,batch_eq_5)13090 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X5, batch_eq_5) {
13091   VUnaryMicrokernelTester()
13092     .batch_size(5)
13093     .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
13094 }
13095 
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X5,batch_div_5)13096 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X5, batch_div_5) {
13097   for (size_t batch_size = 10; batch_size < 50; batch_size += 5) {
13098     VUnaryMicrokernelTester()
13099       .batch_size(batch_size)
13100       .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
13101   }
13102 }
13103 
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X5,batch_lt_5)13104 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X5, batch_lt_5) {
13105   for (size_t batch_size = 1; batch_size < 5; batch_size++) {
13106     VUnaryMicrokernelTester()
13107       .batch_size(batch_size)
13108       .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
13109   }
13110 }
13111 
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X5,batch_gt_5)13112 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X5, batch_gt_5) {
13113   for (size_t batch_size = 6; batch_size < 10; batch_size++) {
13114     VUnaryMicrokernelTester()
13115       .batch_size(batch_size)
13116       .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
13117   }
13118 }
13119 
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X5,inplace)13120 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X5, inplace) {
13121   for (size_t batch_size = 1; batch_size <= 25; batch_size += 4) {
13122     VUnaryMicrokernelTester()
13123       .batch_size(batch_size)
13124       .inplace(true)
13125       .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
13126   }
13127 }
13128 
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X5,prescale)13129 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X5, prescale) {
13130   for (float prescale : std::vector<float>({0.1f, 10.0f})) {
13131     for (size_t batch_size = 1; batch_size <= 25; batch_size += 4) {
13132       VUnaryMicrokernelTester()
13133         .batch_size(batch_size)
13134         .prescale(prescale)
13135         .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
13136     }
13137   }
13138 }
13139 
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X5,alpha)13140 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X5, alpha) {
13141   for (float alpha : std::vector<float>({0.3f, 3.0f})) {
13142     for (size_t batch_size = 1; batch_size <= 25; batch_size += 4) {
13143       VUnaryMicrokernelTester()
13144         .batch_size(batch_size)
13145         .alpha(alpha)
13146         .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
13147     }
13148   }
13149 }
13150 
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X5,beta)13151 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X5, beta) {
13152   for (float beta : std::vector<float>({0.3f, 3.0f})) {
13153     for (size_t batch_size = 1; batch_size <= 25; batch_size += 4) {
13154       VUnaryMicrokernelTester()
13155         .batch_size(batch_size)
13156         .beta(beta)
13157         .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
13158     }
13159   }
13160 }
13161 
13162 
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X6,batch_eq_6)13163 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X6, batch_eq_6) {
13164   VUnaryMicrokernelTester()
13165     .batch_size(6)
13166     .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
13167 }
13168 
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X6,batch_div_6)13169 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X6, batch_div_6) {
13170   for (size_t batch_size = 12; batch_size < 60; batch_size += 6) {
13171     VUnaryMicrokernelTester()
13172       .batch_size(batch_size)
13173       .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
13174   }
13175 }
13176 
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X6,batch_lt_6)13177 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X6, batch_lt_6) {
13178   for (size_t batch_size = 1; batch_size < 6; batch_size++) {
13179     VUnaryMicrokernelTester()
13180       .batch_size(batch_size)
13181       .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
13182   }
13183 }
13184 
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X6,batch_gt_6)13185 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X6, batch_gt_6) {
13186   for (size_t batch_size = 7; batch_size < 12; batch_size++) {
13187     VUnaryMicrokernelTester()
13188       .batch_size(batch_size)
13189       .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
13190   }
13191 }
13192 
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X6,inplace)13193 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X6, inplace) {
13194   for (size_t batch_size = 1; batch_size <= 30; batch_size += 5) {
13195     VUnaryMicrokernelTester()
13196       .batch_size(batch_size)
13197       .inplace(true)
13198       .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
13199   }
13200 }
13201 
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X6,prescale)13202 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X6, prescale) {
13203   for (float prescale : std::vector<float>({0.1f, 10.0f})) {
13204     for (size_t batch_size = 1; batch_size <= 30; batch_size += 5) {
13205       VUnaryMicrokernelTester()
13206         .batch_size(batch_size)
13207         .prescale(prescale)
13208         .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
13209     }
13210   }
13211 }
13212 
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X6,alpha)13213 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X6, alpha) {
13214   for (float alpha : std::vector<float>({0.3f, 3.0f})) {
13215     for (size_t batch_size = 1; batch_size <= 30; batch_size += 5) {
13216       VUnaryMicrokernelTester()
13217         .batch_size(batch_size)
13218         .alpha(alpha)
13219         .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
13220     }
13221   }
13222 }
13223 
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X6,beta)13224 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X6, beta) {
13225   for (float beta : std::vector<float>({0.3f, 3.0f})) {
13226     for (size_t batch_size = 1; batch_size <= 30; batch_size += 5) {
13227       VUnaryMicrokernelTester()
13228         .batch_size(batch_size)
13229         .beta(beta)
13230         .Test(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6, xnn_init_f32_elu_scalar_rr2_lut16_p3_params);
13231     }
13232   }
13233 }
13234 
13235 
TEST(F32_VELU__SCALAR_RR2_P6_X1,batch_eq_1)13236 TEST(F32_VELU__SCALAR_RR2_P6_X1, batch_eq_1) {
13237   VUnaryMicrokernelTester()
13238     .batch_size(1)
13239     .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x1, xnn_init_f32_elu_scalar_rr2_p6_params);
13240 }
13241 
TEST(F32_VELU__SCALAR_RR2_P6_X1,batch_gt_1)13242 TEST(F32_VELU__SCALAR_RR2_P6_X1, batch_gt_1) {
13243   for (size_t batch_size = 2; batch_size < 10; batch_size++) {
13244     VUnaryMicrokernelTester()
13245       .batch_size(batch_size)
13246       .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x1, xnn_init_f32_elu_scalar_rr2_p6_params);
13247   }
13248 }
13249 
TEST(F32_VELU__SCALAR_RR2_P6_X1,inplace)13250 TEST(F32_VELU__SCALAR_RR2_P6_X1, inplace) {
13251   for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
13252     VUnaryMicrokernelTester()
13253       .batch_size(batch_size)
13254       .inplace(true)
13255       .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x1, xnn_init_f32_elu_scalar_rr2_p6_params);
13256   }
13257 }
13258 
TEST(F32_VELU__SCALAR_RR2_P6_X1,prescale)13259 TEST(F32_VELU__SCALAR_RR2_P6_X1, prescale) {
13260   for (float prescale : std::vector<float>({0.1f, 10.0f})) {
13261     for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
13262       VUnaryMicrokernelTester()
13263         .batch_size(batch_size)
13264         .prescale(prescale)
13265         .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x1, xnn_init_f32_elu_scalar_rr2_p6_params);
13266     }
13267   }
13268 }
13269 
TEST(F32_VELU__SCALAR_RR2_P6_X1,alpha)13270 TEST(F32_VELU__SCALAR_RR2_P6_X1, alpha) {
13271   for (float alpha : std::vector<float>({0.3f, 3.0f})) {
13272     for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
13273       VUnaryMicrokernelTester()
13274         .batch_size(batch_size)
13275         .alpha(alpha)
13276         .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x1, xnn_init_f32_elu_scalar_rr2_p6_params);
13277     }
13278   }
13279 }
13280 
TEST(F32_VELU__SCALAR_RR2_P6_X1,beta)13281 TEST(F32_VELU__SCALAR_RR2_P6_X1, beta) {
13282   for (float beta : std::vector<float>({0.3f, 3.0f})) {
13283     for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
13284       VUnaryMicrokernelTester()
13285         .batch_size(batch_size)
13286         .beta(beta)
13287         .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x1, xnn_init_f32_elu_scalar_rr2_p6_params);
13288     }
13289   }
13290 }
13291 
13292 
TEST(F32_VELU__SCALAR_RR2_P6_X2,batch_eq_2)13293 TEST(F32_VELU__SCALAR_RR2_P6_X2, batch_eq_2) {
13294   VUnaryMicrokernelTester()
13295     .batch_size(2)
13296     .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x2, xnn_init_f32_elu_scalar_rr2_p6_params);
13297 }
13298 
TEST(F32_VELU__SCALAR_RR2_P6_X2,batch_div_2)13299 TEST(F32_VELU__SCALAR_RR2_P6_X2, batch_div_2) {
13300   for (size_t batch_size = 4; batch_size < 20; batch_size += 2) {
13301     VUnaryMicrokernelTester()
13302       .batch_size(batch_size)
13303       .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x2, xnn_init_f32_elu_scalar_rr2_p6_params);
13304   }
13305 }
13306 
TEST(F32_VELU__SCALAR_RR2_P6_X2,batch_lt_2)13307 TEST(F32_VELU__SCALAR_RR2_P6_X2, batch_lt_2) {
13308   for (size_t batch_size = 1; batch_size < 2; batch_size++) {
13309     VUnaryMicrokernelTester()
13310       .batch_size(batch_size)
13311       .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x2, xnn_init_f32_elu_scalar_rr2_p6_params);
13312   }
13313 }
13314 
TEST(F32_VELU__SCALAR_RR2_P6_X2,batch_gt_2)13315 TEST(F32_VELU__SCALAR_RR2_P6_X2, batch_gt_2) {
13316   for (size_t batch_size = 3; batch_size < 4; batch_size++) {
13317     VUnaryMicrokernelTester()
13318       .batch_size(batch_size)
13319       .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x2, xnn_init_f32_elu_scalar_rr2_p6_params);
13320   }
13321 }
13322 
TEST(F32_VELU__SCALAR_RR2_P6_X2,inplace)13323 TEST(F32_VELU__SCALAR_RR2_P6_X2, inplace) {
13324   for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
13325     VUnaryMicrokernelTester()
13326       .batch_size(batch_size)
13327       .inplace(true)
13328       .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x2, xnn_init_f32_elu_scalar_rr2_p6_params);
13329   }
13330 }
13331 
TEST(F32_VELU__SCALAR_RR2_P6_X2,prescale)13332 TEST(F32_VELU__SCALAR_RR2_P6_X2, prescale) {
13333   for (float prescale : std::vector<float>({0.1f, 10.0f})) {
13334     for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
13335       VUnaryMicrokernelTester()
13336         .batch_size(batch_size)
13337         .prescale(prescale)
13338         .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x2, xnn_init_f32_elu_scalar_rr2_p6_params);
13339     }
13340   }
13341 }
13342 
TEST(F32_VELU__SCALAR_RR2_P6_X2,alpha)13343 TEST(F32_VELU__SCALAR_RR2_P6_X2, alpha) {
13344   for (float alpha : std::vector<float>({0.3f, 3.0f})) {
13345     for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
13346       VUnaryMicrokernelTester()
13347         .batch_size(batch_size)
13348         .alpha(alpha)
13349         .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x2, xnn_init_f32_elu_scalar_rr2_p6_params);
13350     }
13351   }
13352 }
13353 
TEST(F32_VELU__SCALAR_RR2_P6_X2,beta)13354 TEST(F32_VELU__SCALAR_RR2_P6_X2, beta) {
13355   for (float beta : std::vector<float>({0.3f, 3.0f})) {
13356     for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
13357       VUnaryMicrokernelTester()
13358         .batch_size(batch_size)
13359         .beta(beta)
13360         .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x2, xnn_init_f32_elu_scalar_rr2_p6_params);
13361     }
13362   }
13363 }
13364 
13365 
TEST(F32_VELU__SCALAR_RR2_P6_X3,batch_eq_3)13366 TEST(F32_VELU__SCALAR_RR2_P6_X3, batch_eq_3) {
13367   VUnaryMicrokernelTester()
13368     .batch_size(3)
13369     .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x3, xnn_init_f32_elu_scalar_rr2_p6_params);
13370 }
13371 
TEST(F32_VELU__SCALAR_RR2_P6_X3,batch_div_3)13372 TEST(F32_VELU__SCALAR_RR2_P6_X3, batch_div_3) {
13373   for (size_t batch_size = 6; batch_size < 30; batch_size += 3) {
13374     VUnaryMicrokernelTester()
13375       .batch_size(batch_size)
13376       .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x3, xnn_init_f32_elu_scalar_rr2_p6_params);
13377   }
13378 }
13379 
TEST(F32_VELU__SCALAR_RR2_P6_X3,batch_lt_3)13380 TEST(F32_VELU__SCALAR_RR2_P6_X3, batch_lt_3) {
13381   for (size_t batch_size = 1; batch_size < 3; batch_size++) {
13382     VUnaryMicrokernelTester()
13383       .batch_size(batch_size)
13384       .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x3, xnn_init_f32_elu_scalar_rr2_p6_params);
13385   }
13386 }
13387 
TEST(F32_VELU__SCALAR_RR2_P6_X3,batch_gt_3)13388 TEST(F32_VELU__SCALAR_RR2_P6_X3, batch_gt_3) {
13389   for (size_t batch_size = 4; batch_size < 6; batch_size++) {
13390     VUnaryMicrokernelTester()
13391       .batch_size(batch_size)
13392       .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x3, xnn_init_f32_elu_scalar_rr2_p6_params);
13393   }
13394 }
13395 
TEST(F32_VELU__SCALAR_RR2_P6_X3,inplace)13396 TEST(F32_VELU__SCALAR_RR2_P6_X3, inplace) {
13397   for (size_t batch_size = 1; batch_size <= 15; batch_size += 2) {
13398     VUnaryMicrokernelTester()
13399       .batch_size(batch_size)
13400       .inplace(true)
13401       .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x3, xnn_init_f32_elu_scalar_rr2_p6_params);
13402   }
13403 }
13404 
TEST(F32_VELU__SCALAR_RR2_P6_X3,prescale)13405 TEST(F32_VELU__SCALAR_RR2_P6_X3, prescale) {
13406   for (float prescale : std::vector<float>({0.1f, 10.0f})) {
13407     for (size_t batch_size = 1; batch_size <= 15; batch_size += 2) {
13408       VUnaryMicrokernelTester()
13409         .batch_size(batch_size)
13410         .prescale(prescale)
13411         .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x3, xnn_init_f32_elu_scalar_rr2_p6_params);
13412     }
13413   }
13414 }
13415 
TEST(F32_VELU__SCALAR_RR2_P6_X3,alpha)13416 TEST(F32_VELU__SCALAR_RR2_P6_X3, alpha) {
13417   for (float alpha : std::vector<float>({0.3f, 3.0f})) {
13418     for (size_t batch_size = 1; batch_size <= 15; batch_size += 2) {
13419       VUnaryMicrokernelTester()
13420         .batch_size(batch_size)
13421         .alpha(alpha)
13422         .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x3, xnn_init_f32_elu_scalar_rr2_p6_params);
13423     }
13424   }
13425 }
13426 
TEST(F32_VELU__SCALAR_RR2_P6_X3,beta)13427 TEST(F32_VELU__SCALAR_RR2_P6_X3, beta) {
13428   for (float beta : std::vector<float>({0.3f, 3.0f})) {
13429     for (size_t batch_size = 1; batch_size <= 15; batch_size += 2) {
13430       VUnaryMicrokernelTester()
13431         .batch_size(batch_size)
13432         .beta(beta)
13433         .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x3, xnn_init_f32_elu_scalar_rr2_p6_params);
13434     }
13435   }
13436 }
13437 
13438 
TEST(F32_VELU__SCALAR_RR2_P6_X4,batch_eq_4)13439 TEST(F32_VELU__SCALAR_RR2_P6_X4, batch_eq_4) {
13440   VUnaryMicrokernelTester()
13441     .batch_size(4)
13442     .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x4, xnn_init_f32_elu_scalar_rr2_p6_params);
13443 }
13444 
TEST(F32_VELU__SCALAR_RR2_P6_X4,batch_div_4)13445 TEST(F32_VELU__SCALAR_RR2_P6_X4, batch_div_4) {
13446   for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
13447     VUnaryMicrokernelTester()
13448       .batch_size(batch_size)
13449       .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x4, xnn_init_f32_elu_scalar_rr2_p6_params);
13450   }
13451 }
13452 
TEST(F32_VELU__SCALAR_RR2_P6_X4,batch_lt_4)13453 TEST(F32_VELU__SCALAR_RR2_P6_X4, batch_lt_4) {
13454   for (size_t batch_size = 1; batch_size < 4; batch_size++) {
13455     VUnaryMicrokernelTester()
13456       .batch_size(batch_size)
13457       .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x4, xnn_init_f32_elu_scalar_rr2_p6_params);
13458   }
13459 }
13460 
TEST(F32_VELU__SCALAR_RR2_P6_X4,batch_gt_4)13461 TEST(F32_VELU__SCALAR_RR2_P6_X4, batch_gt_4) {
13462   for (size_t batch_size = 5; batch_size < 8; batch_size++) {
13463     VUnaryMicrokernelTester()
13464       .batch_size(batch_size)
13465       .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x4, xnn_init_f32_elu_scalar_rr2_p6_params);
13466   }
13467 }
13468 
TEST(F32_VELU__SCALAR_RR2_P6_X4,inplace)13469 TEST(F32_VELU__SCALAR_RR2_P6_X4, inplace) {
13470   for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
13471     VUnaryMicrokernelTester()
13472       .batch_size(batch_size)
13473       .inplace(true)
13474       .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x4, xnn_init_f32_elu_scalar_rr2_p6_params);
13475   }
13476 }
13477 
TEST(F32_VELU__SCALAR_RR2_P6_X4,prescale)13478 TEST(F32_VELU__SCALAR_RR2_P6_X4, prescale) {
13479   for (float prescale : std::vector<float>({0.1f, 10.0f})) {
13480     for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
13481       VUnaryMicrokernelTester()
13482         .batch_size(batch_size)
13483         .prescale(prescale)
13484         .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x4, xnn_init_f32_elu_scalar_rr2_p6_params);
13485     }
13486   }
13487 }
13488 
TEST(F32_VELU__SCALAR_RR2_P6_X4,alpha)13489 TEST(F32_VELU__SCALAR_RR2_P6_X4, alpha) {
13490   for (float alpha : std::vector<float>({0.3f, 3.0f})) {
13491     for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
13492       VUnaryMicrokernelTester()
13493         .batch_size(batch_size)
13494         .alpha(alpha)
13495         .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x4, xnn_init_f32_elu_scalar_rr2_p6_params);
13496     }
13497   }
13498 }
13499 
TEST(F32_VELU__SCALAR_RR2_P6_X4,beta)13500 TEST(F32_VELU__SCALAR_RR2_P6_X4, beta) {
13501   for (float beta : std::vector<float>({0.3f, 3.0f})) {
13502     for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
13503       VUnaryMicrokernelTester()
13504         .batch_size(batch_size)
13505         .beta(beta)
13506         .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x4, xnn_init_f32_elu_scalar_rr2_p6_params);
13507     }
13508   }
13509 }
13510 
13511 
TEST(F32_VELU__SCALAR_RR2_P6_X5,batch_eq_5)13512 TEST(F32_VELU__SCALAR_RR2_P6_X5, batch_eq_5) {
13513   VUnaryMicrokernelTester()
13514     .batch_size(5)
13515     .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x5, xnn_init_f32_elu_scalar_rr2_p6_params);
13516 }
13517 
TEST(F32_VELU__SCALAR_RR2_P6_X5,batch_div_5)13518 TEST(F32_VELU__SCALAR_RR2_P6_X5, batch_div_5) {
13519   for (size_t batch_size = 10; batch_size < 50; batch_size += 5) {
13520     VUnaryMicrokernelTester()
13521       .batch_size(batch_size)
13522       .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x5, xnn_init_f32_elu_scalar_rr2_p6_params);
13523   }
13524 }
13525 
TEST(F32_VELU__SCALAR_RR2_P6_X5,batch_lt_5)13526 TEST(F32_VELU__SCALAR_RR2_P6_X5, batch_lt_5) {
13527   for (size_t batch_size = 1; batch_size < 5; batch_size++) {
13528     VUnaryMicrokernelTester()
13529       .batch_size(batch_size)
13530       .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x5, xnn_init_f32_elu_scalar_rr2_p6_params);
13531   }
13532 }
13533 
TEST(F32_VELU__SCALAR_RR2_P6_X5,batch_gt_5)13534 TEST(F32_VELU__SCALAR_RR2_P6_X5, batch_gt_5) {
13535   for (size_t batch_size = 6; batch_size < 10; batch_size++) {
13536     VUnaryMicrokernelTester()
13537       .batch_size(batch_size)
13538       .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x5, xnn_init_f32_elu_scalar_rr2_p6_params);
13539   }
13540 }
13541 
TEST(F32_VELU__SCALAR_RR2_P6_X5,inplace)13542 TEST(F32_VELU__SCALAR_RR2_P6_X5, inplace) {
13543   for (size_t batch_size = 1; batch_size <= 25; batch_size += 4) {
13544     VUnaryMicrokernelTester()
13545       .batch_size(batch_size)
13546       .inplace(true)
13547       .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x5, xnn_init_f32_elu_scalar_rr2_p6_params);
13548   }
13549 }
13550 
TEST(F32_VELU__SCALAR_RR2_P6_X5,prescale)13551 TEST(F32_VELU__SCALAR_RR2_P6_X5, prescale) {
13552   for (float prescale : std::vector<float>({0.1f, 10.0f})) {
13553     for (size_t batch_size = 1; batch_size <= 25; batch_size += 4) {
13554       VUnaryMicrokernelTester()
13555         .batch_size(batch_size)
13556         .prescale(prescale)
13557         .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x5, xnn_init_f32_elu_scalar_rr2_p6_params);
13558     }
13559   }
13560 }
13561 
TEST(F32_VELU__SCALAR_RR2_P6_X5,alpha)13562 TEST(F32_VELU__SCALAR_RR2_P6_X5, alpha) {
13563   for (float alpha : std::vector<float>({0.3f, 3.0f})) {
13564     for (size_t batch_size = 1; batch_size <= 25; batch_size += 4) {
13565       VUnaryMicrokernelTester()
13566         .batch_size(batch_size)
13567         .alpha(alpha)
13568         .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x5, xnn_init_f32_elu_scalar_rr2_p6_params);
13569     }
13570   }
13571 }
13572 
TEST(F32_VELU__SCALAR_RR2_P6_X5,beta)13573 TEST(F32_VELU__SCALAR_RR2_P6_X5, beta) {
13574   for (float beta : std::vector<float>({0.3f, 3.0f})) {
13575     for (size_t batch_size = 1; batch_size <= 25; batch_size += 4) {
13576       VUnaryMicrokernelTester()
13577         .batch_size(batch_size)
13578         .beta(beta)
13579         .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x5, xnn_init_f32_elu_scalar_rr2_p6_params);
13580     }
13581   }
13582 }
13583 
13584 
TEST(F32_VELU__SCALAR_RR2_P6_X6,batch_eq_6)13585 TEST(F32_VELU__SCALAR_RR2_P6_X6, batch_eq_6) {
13586   VUnaryMicrokernelTester()
13587     .batch_size(6)
13588     .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x6, xnn_init_f32_elu_scalar_rr2_p6_params);
13589 }
13590 
TEST(F32_VELU__SCALAR_RR2_P6_X6,batch_div_6)13591 TEST(F32_VELU__SCALAR_RR2_P6_X6, batch_div_6) {
13592   for (size_t batch_size = 12; batch_size < 60; batch_size += 6) {
13593     VUnaryMicrokernelTester()
13594       .batch_size(batch_size)
13595       .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x6, xnn_init_f32_elu_scalar_rr2_p6_params);
13596   }
13597 }
13598 
TEST(F32_VELU__SCALAR_RR2_P6_X6,batch_lt_6)13599 TEST(F32_VELU__SCALAR_RR2_P6_X6, batch_lt_6) {
13600   for (size_t batch_size = 1; batch_size < 6; batch_size++) {
13601     VUnaryMicrokernelTester()
13602       .batch_size(batch_size)
13603       .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x6, xnn_init_f32_elu_scalar_rr2_p6_params);
13604   }
13605 }
13606 
TEST(F32_VELU__SCALAR_RR2_P6_X6,batch_gt_6)13607 TEST(F32_VELU__SCALAR_RR2_P6_X6, batch_gt_6) {
13608   for (size_t batch_size = 7; batch_size < 12; batch_size++) {
13609     VUnaryMicrokernelTester()
13610       .batch_size(batch_size)
13611       .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x6, xnn_init_f32_elu_scalar_rr2_p6_params);
13612   }
13613 }
13614 
TEST(F32_VELU__SCALAR_RR2_P6_X6,inplace)13615 TEST(F32_VELU__SCALAR_RR2_P6_X6, inplace) {
13616   for (size_t batch_size = 1; batch_size <= 30; batch_size += 5) {
13617     VUnaryMicrokernelTester()
13618       .batch_size(batch_size)
13619       .inplace(true)
13620       .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x6, xnn_init_f32_elu_scalar_rr2_p6_params);
13621   }
13622 }
13623 
TEST(F32_VELU__SCALAR_RR2_P6_X6,prescale)13624 TEST(F32_VELU__SCALAR_RR2_P6_X6, prescale) {
13625   for (float prescale : std::vector<float>({0.1f, 10.0f})) {
13626     for (size_t batch_size = 1; batch_size <= 30; batch_size += 5) {
13627       VUnaryMicrokernelTester()
13628         .batch_size(batch_size)
13629         .prescale(prescale)
13630         .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x6, xnn_init_f32_elu_scalar_rr2_p6_params);
13631     }
13632   }
13633 }
13634 
TEST(F32_VELU__SCALAR_RR2_P6_X6,alpha)13635 TEST(F32_VELU__SCALAR_RR2_P6_X6, alpha) {
13636   for (float alpha : std::vector<float>({0.3f, 3.0f})) {
13637     for (size_t batch_size = 1; batch_size <= 30; batch_size += 5) {
13638       VUnaryMicrokernelTester()
13639         .batch_size(batch_size)
13640         .alpha(alpha)
13641         .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x6, xnn_init_f32_elu_scalar_rr2_p6_params);
13642     }
13643   }
13644 }
13645 
TEST(F32_VELU__SCALAR_RR2_P6_X6,beta)13646 TEST(F32_VELU__SCALAR_RR2_P6_X6, beta) {
13647   for (float beta : std::vector<float>({0.3f, 3.0f})) {
13648     for (size_t batch_size = 1; batch_size <= 30; batch_size += 5) {
13649       VUnaryMicrokernelTester()
13650         .batch_size(batch_size)
13651         .beta(beta)
13652         .Test(xnn_f32_velu_ukernel__scalar_rr2_p6_x6, xnn_init_f32_elu_scalar_rr2_p6_params);
13653     }
13654   }
13655 }
13656