1 // Copyright 2019 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 //
6 // Auto-generated file. Do not edit!
7 // Specification: test/f32-velu.yaml
8 // Generator: tools/generate-vunary-test.py
9
10
11 #include <gtest/gtest.h>
12
13 #include <xnnpack/common.h>
14 #include <xnnpack/isa-checks.h>
15
16 #include <xnnpack/vunary.h>
17 #include "vunary-microkernel-tester.h"
18
19
20 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEON_RR2_LUT16_P3_X4,batch_eq_4)21 TEST(F32_VELU__NEON_RR2_LUT16_P3_X4, batch_eq_4) {
22 TEST_REQUIRES_ARM_NEON;
23 VUnOpMicrokernelTester()
24 .batch_size(4)
25 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x4), VUnOpMicrokernelTester::OpType::ELU);
26 }
27
TEST(F32_VELU__NEON_RR2_LUT16_P3_X4,batch_div_4)28 TEST(F32_VELU__NEON_RR2_LUT16_P3_X4, batch_div_4) {
29 TEST_REQUIRES_ARM_NEON;
30 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
31 VUnOpMicrokernelTester()
32 .batch_size(batch_size)
33 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x4), VUnOpMicrokernelTester::OpType::ELU);
34 }
35 }
36
TEST(F32_VELU__NEON_RR2_LUT16_P3_X4,batch_lt_4)37 TEST(F32_VELU__NEON_RR2_LUT16_P3_X4, batch_lt_4) {
38 TEST_REQUIRES_ARM_NEON;
39 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
40 VUnOpMicrokernelTester()
41 .batch_size(batch_size)
42 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x4), VUnOpMicrokernelTester::OpType::ELU);
43 }
44 }
45
TEST(F32_VELU__NEON_RR2_LUT16_P3_X4,batch_gt_4)46 TEST(F32_VELU__NEON_RR2_LUT16_P3_X4, batch_gt_4) {
47 TEST_REQUIRES_ARM_NEON;
48 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
49 VUnOpMicrokernelTester()
50 .batch_size(batch_size)
51 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x4), VUnOpMicrokernelTester::OpType::ELU);
52 }
53 }
54
TEST(F32_VELU__NEON_RR2_LUT16_P3_X4,inplace)55 TEST(F32_VELU__NEON_RR2_LUT16_P3_X4, inplace) {
56 TEST_REQUIRES_ARM_NEON;
57 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
58 VUnOpMicrokernelTester()
59 .batch_size(batch_size)
60 .inplace(true)
61 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x4), VUnOpMicrokernelTester::OpType::ELU);
62 }
63 }
64
TEST(F32_VELU__NEON_RR2_LUT16_P3_X4,prescale)65 TEST(F32_VELU__NEON_RR2_LUT16_P3_X4, prescale) {
66 TEST_REQUIRES_ARM_NEON;
67 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
68 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
69 VUnOpMicrokernelTester()
70 .batch_size(batch_size)
71 .prescale(prescale)
72 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x4), VUnOpMicrokernelTester::OpType::ELU);
73 }
74 }
75 }
76
TEST(F32_VELU__NEON_RR2_LUT16_P3_X4,alpha)77 TEST(F32_VELU__NEON_RR2_LUT16_P3_X4, alpha) {
78 TEST_REQUIRES_ARM_NEON;
79 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
80 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
81 VUnOpMicrokernelTester()
82 .batch_size(batch_size)
83 .alpha(alpha)
84 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x4), VUnOpMicrokernelTester::OpType::ELU);
85 }
86 }
87 }
88
TEST(F32_VELU__NEON_RR2_LUT16_P3_X4,beta)89 TEST(F32_VELU__NEON_RR2_LUT16_P3_X4, beta) {
90 TEST_REQUIRES_ARM_NEON;
91 for (float beta : std::vector<float>({0.3f, 3.0f})) {
92 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
93 VUnOpMicrokernelTester()
94 .batch_size(batch_size)
95 .beta(beta)
96 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x4), VUnOpMicrokernelTester::OpType::ELU);
97 }
98 }
99 }
100 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
101
102
103 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEON_RR2_LUT16_P3_X8,batch_eq_8)104 TEST(F32_VELU__NEON_RR2_LUT16_P3_X8, batch_eq_8) {
105 TEST_REQUIRES_ARM_NEON;
106 VUnOpMicrokernelTester()
107 .batch_size(8)
108 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x8), VUnOpMicrokernelTester::OpType::ELU);
109 }
110
TEST(F32_VELU__NEON_RR2_LUT16_P3_X8,batch_div_8)111 TEST(F32_VELU__NEON_RR2_LUT16_P3_X8, batch_div_8) {
112 TEST_REQUIRES_ARM_NEON;
113 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
114 VUnOpMicrokernelTester()
115 .batch_size(batch_size)
116 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x8), VUnOpMicrokernelTester::OpType::ELU);
117 }
118 }
119
TEST(F32_VELU__NEON_RR2_LUT16_P3_X8,batch_lt_8)120 TEST(F32_VELU__NEON_RR2_LUT16_P3_X8, batch_lt_8) {
121 TEST_REQUIRES_ARM_NEON;
122 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
123 VUnOpMicrokernelTester()
124 .batch_size(batch_size)
125 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x8), VUnOpMicrokernelTester::OpType::ELU);
126 }
127 }
128
TEST(F32_VELU__NEON_RR2_LUT16_P3_X8,batch_gt_8)129 TEST(F32_VELU__NEON_RR2_LUT16_P3_X8, batch_gt_8) {
130 TEST_REQUIRES_ARM_NEON;
131 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
132 VUnOpMicrokernelTester()
133 .batch_size(batch_size)
134 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x8), VUnOpMicrokernelTester::OpType::ELU);
135 }
136 }
137
TEST(F32_VELU__NEON_RR2_LUT16_P3_X8,inplace)138 TEST(F32_VELU__NEON_RR2_LUT16_P3_X8, inplace) {
139 TEST_REQUIRES_ARM_NEON;
140 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
141 VUnOpMicrokernelTester()
142 .batch_size(batch_size)
143 .inplace(true)
144 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x8), VUnOpMicrokernelTester::OpType::ELU);
145 }
146 }
147
TEST(F32_VELU__NEON_RR2_LUT16_P3_X8,prescale)148 TEST(F32_VELU__NEON_RR2_LUT16_P3_X8, prescale) {
149 TEST_REQUIRES_ARM_NEON;
150 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
151 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
152 VUnOpMicrokernelTester()
153 .batch_size(batch_size)
154 .prescale(prescale)
155 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x8), VUnOpMicrokernelTester::OpType::ELU);
156 }
157 }
158 }
159
TEST(F32_VELU__NEON_RR2_LUT16_P3_X8,alpha)160 TEST(F32_VELU__NEON_RR2_LUT16_P3_X8, alpha) {
161 TEST_REQUIRES_ARM_NEON;
162 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
163 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
164 VUnOpMicrokernelTester()
165 .batch_size(batch_size)
166 .alpha(alpha)
167 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x8), VUnOpMicrokernelTester::OpType::ELU);
168 }
169 }
170 }
171
TEST(F32_VELU__NEON_RR2_LUT16_P3_X8,beta)172 TEST(F32_VELU__NEON_RR2_LUT16_P3_X8, beta) {
173 TEST_REQUIRES_ARM_NEON;
174 for (float beta : std::vector<float>({0.3f, 3.0f})) {
175 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
176 VUnOpMicrokernelTester()
177 .batch_size(batch_size)
178 .beta(beta)
179 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x8), VUnOpMicrokernelTester::OpType::ELU);
180 }
181 }
182 }
183 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
184
185
186 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEON_RR2_LUT16_P3_X12,batch_eq_12)187 TEST(F32_VELU__NEON_RR2_LUT16_P3_X12, batch_eq_12) {
188 TEST_REQUIRES_ARM_NEON;
189 VUnOpMicrokernelTester()
190 .batch_size(12)
191 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x12), VUnOpMicrokernelTester::OpType::ELU);
192 }
193
TEST(F32_VELU__NEON_RR2_LUT16_P3_X12,batch_div_12)194 TEST(F32_VELU__NEON_RR2_LUT16_P3_X12, batch_div_12) {
195 TEST_REQUIRES_ARM_NEON;
196 for (size_t batch_size = 24; batch_size < 120; batch_size += 12) {
197 VUnOpMicrokernelTester()
198 .batch_size(batch_size)
199 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x12), VUnOpMicrokernelTester::OpType::ELU);
200 }
201 }
202
TEST(F32_VELU__NEON_RR2_LUT16_P3_X12,batch_lt_12)203 TEST(F32_VELU__NEON_RR2_LUT16_P3_X12, batch_lt_12) {
204 TEST_REQUIRES_ARM_NEON;
205 for (size_t batch_size = 1; batch_size < 12; batch_size++) {
206 VUnOpMicrokernelTester()
207 .batch_size(batch_size)
208 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x12), VUnOpMicrokernelTester::OpType::ELU);
209 }
210 }
211
TEST(F32_VELU__NEON_RR2_LUT16_P3_X12,batch_gt_12)212 TEST(F32_VELU__NEON_RR2_LUT16_P3_X12, batch_gt_12) {
213 TEST_REQUIRES_ARM_NEON;
214 for (size_t batch_size = 13; batch_size < 24; batch_size++) {
215 VUnOpMicrokernelTester()
216 .batch_size(batch_size)
217 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x12), VUnOpMicrokernelTester::OpType::ELU);
218 }
219 }
220
TEST(F32_VELU__NEON_RR2_LUT16_P3_X12,inplace)221 TEST(F32_VELU__NEON_RR2_LUT16_P3_X12, inplace) {
222 TEST_REQUIRES_ARM_NEON;
223 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
224 VUnOpMicrokernelTester()
225 .batch_size(batch_size)
226 .inplace(true)
227 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x12), VUnOpMicrokernelTester::OpType::ELU);
228 }
229 }
230
TEST(F32_VELU__NEON_RR2_LUT16_P3_X12,prescale)231 TEST(F32_VELU__NEON_RR2_LUT16_P3_X12, prescale) {
232 TEST_REQUIRES_ARM_NEON;
233 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
234 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
235 VUnOpMicrokernelTester()
236 .batch_size(batch_size)
237 .prescale(prescale)
238 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x12), VUnOpMicrokernelTester::OpType::ELU);
239 }
240 }
241 }
242
TEST(F32_VELU__NEON_RR2_LUT16_P3_X12,alpha)243 TEST(F32_VELU__NEON_RR2_LUT16_P3_X12, alpha) {
244 TEST_REQUIRES_ARM_NEON;
245 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
246 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
247 VUnOpMicrokernelTester()
248 .batch_size(batch_size)
249 .alpha(alpha)
250 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x12), VUnOpMicrokernelTester::OpType::ELU);
251 }
252 }
253 }
254
TEST(F32_VELU__NEON_RR2_LUT16_P3_X12,beta)255 TEST(F32_VELU__NEON_RR2_LUT16_P3_X12, beta) {
256 TEST_REQUIRES_ARM_NEON;
257 for (float beta : std::vector<float>({0.3f, 3.0f})) {
258 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
259 VUnOpMicrokernelTester()
260 .batch_size(batch_size)
261 .beta(beta)
262 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x12), VUnOpMicrokernelTester::OpType::ELU);
263 }
264 }
265 }
266 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
267
268
269 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEON_RR2_LUT16_P3_X16,batch_eq_16)270 TEST(F32_VELU__NEON_RR2_LUT16_P3_X16, batch_eq_16) {
271 TEST_REQUIRES_ARM_NEON;
272 VUnOpMicrokernelTester()
273 .batch_size(16)
274 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x16), VUnOpMicrokernelTester::OpType::ELU);
275 }
276
TEST(F32_VELU__NEON_RR2_LUT16_P3_X16,batch_div_16)277 TEST(F32_VELU__NEON_RR2_LUT16_P3_X16, batch_div_16) {
278 TEST_REQUIRES_ARM_NEON;
279 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
280 VUnOpMicrokernelTester()
281 .batch_size(batch_size)
282 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x16), VUnOpMicrokernelTester::OpType::ELU);
283 }
284 }
285
TEST(F32_VELU__NEON_RR2_LUT16_P3_X16,batch_lt_16)286 TEST(F32_VELU__NEON_RR2_LUT16_P3_X16, batch_lt_16) {
287 TEST_REQUIRES_ARM_NEON;
288 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
289 VUnOpMicrokernelTester()
290 .batch_size(batch_size)
291 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x16), VUnOpMicrokernelTester::OpType::ELU);
292 }
293 }
294
TEST(F32_VELU__NEON_RR2_LUT16_P3_X16,batch_gt_16)295 TEST(F32_VELU__NEON_RR2_LUT16_P3_X16, batch_gt_16) {
296 TEST_REQUIRES_ARM_NEON;
297 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
298 VUnOpMicrokernelTester()
299 .batch_size(batch_size)
300 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x16), VUnOpMicrokernelTester::OpType::ELU);
301 }
302 }
303
TEST(F32_VELU__NEON_RR2_LUT16_P3_X16,inplace)304 TEST(F32_VELU__NEON_RR2_LUT16_P3_X16, inplace) {
305 TEST_REQUIRES_ARM_NEON;
306 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
307 VUnOpMicrokernelTester()
308 .batch_size(batch_size)
309 .inplace(true)
310 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x16), VUnOpMicrokernelTester::OpType::ELU);
311 }
312 }
313
TEST(F32_VELU__NEON_RR2_LUT16_P3_X16,prescale)314 TEST(F32_VELU__NEON_RR2_LUT16_P3_X16, prescale) {
315 TEST_REQUIRES_ARM_NEON;
316 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
317 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
318 VUnOpMicrokernelTester()
319 .batch_size(batch_size)
320 .prescale(prescale)
321 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x16), VUnOpMicrokernelTester::OpType::ELU);
322 }
323 }
324 }
325
TEST(F32_VELU__NEON_RR2_LUT16_P3_X16,alpha)326 TEST(F32_VELU__NEON_RR2_LUT16_P3_X16, alpha) {
327 TEST_REQUIRES_ARM_NEON;
328 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
329 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
330 VUnOpMicrokernelTester()
331 .batch_size(batch_size)
332 .alpha(alpha)
333 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x16), VUnOpMicrokernelTester::OpType::ELU);
334 }
335 }
336 }
337
TEST(F32_VELU__NEON_RR2_LUT16_P3_X16,beta)338 TEST(F32_VELU__NEON_RR2_LUT16_P3_X16, beta) {
339 TEST_REQUIRES_ARM_NEON;
340 for (float beta : std::vector<float>({0.3f, 3.0f})) {
341 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
342 VUnOpMicrokernelTester()
343 .batch_size(batch_size)
344 .beta(beta)
345 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x16), VUnOpMicrokernelTester::OpType::ELU);
346 }
347 }
348 }
349 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
350
351
352 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEON_RR2_LUT16_P3_X20,batch_eq_20)353 TEST(F32_VELU__NEON_RR2_LUT16_P3_X20, batch_eq_20) {
354 TEST_REQUIRES_ARM_NEON;
355 VUnOpMicrokernelTester()
356 .batch_size(20)
357 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x20), VUnOpMicrokernelTester::OpType::ELU);
358 }
359
TEST(F32_VELU__NEON_RR2_LUT16_P3_X20,batch_div_20)360 TEST(F32_VELU__NEON_RR2_LUT16_P3_X20, batch_div_20) {
361 TEST_REQUIRES_ARM_NEON;
362 for (size_t batch_size = 40; batch_size < 200; batch_size += 20) {
363 VUnOpMicrokernelTester()
364 .batch_size(batch_size)
365 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x20), VUnOpMicrokernelTester::OpType::ELU);
366 }
367 }
368
TEST(F32_VELU__NEON_RR2_LUT16_P3_X20,batch_lt_20)369 TEST(F32_VELU__NEON_RR2_LUT16_P3_X20, batch_lt_20) {
370 TEST_REQUIRES_ARM_NEON;
371 for (size_t batch_size = 1; batch_size < 20; batch_size++) {
372 VUnOpMicrokernelTester()
373 .batch_size(batch_size)
374 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x20), VUnOpMicrokernelTester::OpType::ELU);
375 }
376 }
377
TEST(F32_VELU__NEON_RR2_LUT16_P3_X20,batch_gt_20)378 TEST(F32_VELU__NEON_RR2_LUT16_P3_X20, batch_gt_20) {
379 TEST_REQUIRES_ARM_NEON;
380 for (size_t batch_size = 21; batch_size < 40; batch_size++) {
381 VUnOpMicrokernelTester()
382 .batch_size(batch_size)
383 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x20), VUnOpMicrokernelTester::OpType::ELU);
384 }
385 }
386
TEST(F32_VELU__NEON_RR2_LUT16_P3_X20,inplace)387 TEST(F32_VELU__NEON_RR2_LUT16_P3_X20, inplace) {
388 TEST_REQUIRES_ARM_NEON;
389 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
390 VUnOpMicrokernelTester()
391 .batch_size(batch_size)
392 .inplace(true)
393 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x20), VUnOpMicrokernelTester::OpType::ELU);
394 }
395 }
396
TEST(F32_VELU__NEON_RR2_LUT16_P3_X20,prescale)397 TEST(F32_VELU__NEON_RR2_LUT16_P3_X20, prescale) {
398 TEST_REQUIRES_ARM_NEON;
399 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
400 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
401 VUnOpMicrokernelTester()
402 .batch_size(batch_size)
403 .prescale(prescale)
404 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x20), VUnOpMicrokernelTester::OpType::ELU);
405 }
406 }
407 }
408
TEST(F32_VELU__NEON_RR2_LUT16_P3_X20,alpha)409 TEST(F32_VELU__NEON_RR2_LUT16_P3_X20, alpha) {
410 TEST_REQUIRES_ARM_NEON;
411 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
412 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
413 VUnOpMicrokernelTester()
414 .batch_size(batch_size)
415 .alpha(alpha)
416 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x20), VUnOpMicrokernelTester::OpType::ELU);
417 }
418 }
419 }
420
TEST(F32_VELU__NEON_RR2_LUT16_P3_X20,beta)421 TEST(F32_VELU__NEON_RR2_LUT16_P3_X20, beta) {
422 TEST_REQUIRES_ARM_NEON;
423 for (float beta : std::vector<float>({0.3f, 3.0f})) {
424 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
425 VUnOpMicrokernelTester()
426 .batch_size(batch_size)
427 .beta(beta)
428 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x20), VUnOpMicrokernelTester::OpType::ELU);
429 }
430 }
431 }
432 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
433
434
435 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEON_RR2_LUT16_P3_X24,batch_eq_24)436 TEST(F32_VELU__NEON_RR2_LUT16_P3_X24, batch_eq_24) {
437 TEST_REQUIRES_ARM_NEON;
438 VUnOpMicrokernelTester()
439 .batch_size(24)
440 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x24), VUnOpMicrokernelTester::OpType::ELU);
441 }
442
TEST(F32_VELU__NEON_RR2_LUT16_P3_X24,batch_div_24)443 TEST(F32_VELU__NEON_RR2_LUT16_P3_X24, batch_div_24) {
444 TEST_REQUIRES_ARM_NEON;
445 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
446 VUnOpMicrokernelTester()
447 .batch_size(batch_size)
448 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x24), VUnOpMicrokernelTester::OpType::ELU);
449 }
450 }
451
TEST(F32_VELU__NEON_RR2_LUT16_P3_X24,batch_lt_24)452 TEST(F32_VELU__NEON_RR2_LUT16_P3_X24, batch_lt_24) {
453 TEST_REQUIRES_ARM_NEON;
454 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
455 VUnOpMicrokernelTester()
456 .batch_size(batch_size)
457 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x24), VUnOpMicrokernelTester::OpType::ELU);
458 }
459 }
460
TEST(F32_VELU__NEON_RR2_LUT16_P3_X24,batch_gt_24)461 TEST(F32_VELU__NEON_RR2_LUT16_P3_X24, batch_gt_24) {
462 TEST_REQUIRES_ARM_NEON;
463 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
464 VUnOpMicrokernelTester()
465 .batch_size(batch_size)
466 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x24), VUnOpMicrokernelTester::OpType::ELU);
467 }
468 }
469
TEST(F32_VELU__NEON_RR2_LUT16_P3_X24,inplace)470 TEST(F32_VELU__NEON_RR2_LUT16_P3_X24, inplace) {
471 TEST_REQUIRES_ARM_NEON;
472 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
473 VUnOpMicrokernelTester()
474 .batch_size(batch_size)
475 .inplace(true)
476 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x24), VUnOpMicrokernelTester::OpType::ELU);
477 }
478 }
479
TEST(F32_VELU__NEON_RR2_LUT16_P3_X24,prescale)480 TEST(F32_VELU__NEON_RR2_LUT16_P3_X24, prescale) {
481 TEST_REQUIRES_ARM_NEON;
482 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
483 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
484 VUnOpMicrokernelTester()
485 .batch_size(batch_size)
486 .prescale(prescale)
487 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x24), VUnOpMicrokernelTester::OpType::ELU);
488 }
489 }
490 }
491
TEST(F32_VELU__NEON_RR2_LUT16_P3_X24,alpha)492 TEST(F32_VELU__NEON_RR2_LUT16_P3_X24, alpha) {
493 TEST_REQUIRES_ARM_NEON;
494 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
495 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
496 VUnOpMicrokernelTester()
497 .batch_size(batch_size)
498 .alpha(alpha)
499 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x24), VUnOpMicrokernelTester::OpType::ELU);
500 }
501 }
502 }
503
TEST(F32_VELU__NEON_RR2_LUT16_P3_X24,beta)504 TEST(F32_VELU__NEON_RR2_LUT16_P3_X24, beta) {
505 TEST_REQUIRES_ARM_NEON;
506 for (float beta : std::vector<float>({0.3f, 3.0f})) {
507 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
508 VUnOpMicrokernelTester()
509 .batch_size(batch_size)
510 .beta(beta)
511 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x24), VUnOpMicrokernelTester::OpType::ELU);
512 }
513 }
514 }
515 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
516
517
518 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEON_RR2_P6_X4,batch_eq_4)519 TEST(F32_VELU__NEON_RR2_P6_X4, batch_eq_4) {
520 TEST_REQUIRES_ARM_NEON;
521 VUnOpMicrokernelTester()
522 .batch_size(4)
523 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_p6_x4), VUnOpMicrokernelTester::OpType::ELU);
524 }
525
TEST(F32_VELU__NEON_RR2_P6_X4,batch_div_4)526 TEST(F32_VELU__NEON_RR2_P6_X4, batch_div_4) {
527 TEST_REQUIRES_ARM_NEON;
528 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
529 VUnOpMicrokernelTester()
530 .batch_size(batch_size)
531 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_p6_x4), VUnOpMicrokernelTester::OpType::ELU);
532 }
533 }
534
TEST(F32_VELU__NEON_RR2_P6_X4,batch_lt_4)535 TEST(F32_VELU__NEON_RR2_P6_X4, batch_lt_4) {
536 TEST_REQUIRES_ARM_NEON;
537 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
538 VUnOpMicrokernelTester()
539 .batch_size(batch_size)
540 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_p6_x4), VUnOpMicrokernelTester::OpType::ELU);
541 }
542 }
543
TEST(F32_VELU__NEON_RR2_P6_X4,batch_gt_4)544 TEST(F32_VELU__NEON_RR2_P6_X4, batch_gt_4) {
545 TEST_REQUIRES_ARM_NEON;
546 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
547 VUnOpMicrokernelTester()
548 .batch_size(batch_size)
549 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_p6_x4), VUnOpMicrokernelTester::OpType::ELU);
550 }
551 }
552
TEST(F32_VELU__NEON_RR2_P6_X4,inplace)553 TEST(F32_VELU__NEON_RR2_P6_X4, inplace) {
554 TEST_REQUIRES_ARM_NEON;
555 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
556 VUnOpMicrokernelTester()
557 .batch_size(batch_size)
558 .inplace(true)
559 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_p6_x4), VUnOpMicrokernelTester::OpType::ELU);
560 }
561 }
562
TEST(F32_VELU__NEON_RR2_P6_X4,prescale)563 TEST(F32_VELU__NEON_RR2_P6_X4, prescale) {
564 TEST_REQUIRES_ARM_NEON;
565 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
566 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
567 VUnOpMicrokernelTester()
568 .batch_size(batch_size)
569 .prescale(prescale)
570 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_p6_x4), VUnOpMicrokernelTester::OpType::ELU);
571 }
572 }
573 }
574
TEST(F32_VELU__NEON_RR2_P6_X4,alpha)575 TEST(F32_VELU__NEON_RR2_P6_X4, alpha) {
576 TEST_REQUIRES_ARM_NEON;
577 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
578 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
579 VUnOpMicrokernelTester()
580 .batch_size(batch_size)
581 .alpha(alpha)
582 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_p6_x4), VUnOpMicrokernelTester::OpType::ELU);
583 }
584 }
585 }
586
TEST(F32_VELU__NEON_RR2_P6_X4,beta)587 TEST(F32_VELU__NEON_RR2_P6_X4, beta) {
588 TEST_REQUIRES_ARM_NEON;
589 for (float beta : std::vector<float>({0.3f, 3.0f})) {
590 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
591 VUnOpMicrokernelTester()
592 .batch_size(batch_size)
593 .beta(beta)
594 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_p6_x4), VUnOpMicrokernelTester::OpType::ELU);
595 }
596 }
597 }
598 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
599
600
601 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEON_RR2_P6_X8,batch_eq_8)602 TEST(F32_VELU__NEON_RR2_P6_X8, batch_eq_8) {
603 TEST_REQUIRES_ARM_NEON;
604 VUnOpMicrokernelTester()
605 .batch_size(8)
606 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_p6_x8), VUnOpMicrokernelTester::OpType::ELU);
607 }
608
TEST(F32_VELU__NEON_RR2_P6_X8,batch_div_8)609 TEST(F32_VELU__NEON_RR2_P6_X8, batch_div_8) {
610 TEST_REQUIRES_ARM_NEON;
611 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
612 VUnOpMicrokernelTester()
613 .batch_size(batch_size)
614 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_p6_x8), VUnOpMicrokernelTester::OpType::ELU);
615 }
616 }
617
TEST(F32_VELU__NEON_RR2_P6_X8,batch_lt_8)618 TEST(F32_VELU__NEON_RR2_P6_X8, batch_lt_8) {
619 TEST_REQUIRES_ARM_NEON;
620 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
621 VUnOpMicrokernelTester()
622 .batch_size(batch_size)
623 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_p6_x8), VUnOpMicrokernelTester::OpType::ELU);
624 }
625 }
626
TEST(F32_VELU__NEON_RR2_P6_X8,batch_gt_8)627 TEST(F32_VELU__NEON_RR2_P6_X8, batch_gt_8) {
628 TEST_REQUIRES_ARM_NEON;
629 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
630 VUnOpMicrokernelTester()
631 .batch_size(batch_size)
632 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_p6_x8), VUnOpMicrokernelTester::OpType::ELU);
633 }
634 }
635
TEST(F32_VELU__NEON_RR2_P6_X8,inplace)636 TEST(F32_VELU__NEON_RR2_P6_X8, inplace) {
637 TEST_REQUIRES_ARM_NEON;
638 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
639 VUnOpMicrokernelTester()
640 .batch_size(batch_size)
641 .inplace(true)
642 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_p6_x8), VUnOpMicrokernelTester::OpType::ELU);
643 }
644 }
645
TEST(F32_VELU__NEON_RR2_P6_X8,prescale)646 TEST(F32_VELU__NEON_RR2_P6_X8, prescale) {
647 TEST_REQUIRES_ARM_NEON;
648 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
649 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
650 VUnOpMicrokernelTester()
651 .batch_size(batch_size)
652 .prescale(prescale)
653 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_p6_x8), VUnOpMicrokernelTester::OpType::ELU);
654 }
655 }
656 }
657
TEST(F32_VELU__NEON_RR2_P6_X8,alpha)658 TEST(F32_VELU__NEON_RR2_P6_X8, alpha) {
659 TEST_REQUIRES_ARM_NEON;
660 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
661 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
662 VUnOpMicrokernelTester()
663 .batch_size(batch_size)
664 .alpha(alpha)
665 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_p6_x8), VUnOpMicrokernelTester::OpType::ELU);
666 }
667 }
668 }
669
TEST(F32_VELU__NEON_RR2_P6_X8,beta)670 TEST(F32_VELU__NEON_RR2_P6_X8, beta) {
671 TEST_REQUIRES_ARM_NEON;
672 for (float beta : std::vector<float>({0.3f, 3.0f})) {
673 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
674 VUnOpMicrokernelTester()
675 .batch_size(batch_size)
676 .beta(beta)
677 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_p6_x8), VUnOpMicrokernelTester::OpType::ELU);
678 }
679 }
680 }
681 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
682
683
684 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEON_RR2_P6_X12,batch_eq_12)685 TEST(F32_VELU__NEON_RR2_P6_X12, batch_eq_12) {
686 TEST_REQUIRES_ARM_NEON;
687 VUnOpMicrokernelTester()
688 .batch_size(12)
689 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_p6_x12), VUnOpMicrokernelTester::OpType::ELU);
690 }
691
TEST(F32_VELU__NEON_RR2_P6_X12,batch_div_12)692 TEST(F32_VELU__NEON_RR2_P6_X12, batch_div_12) {
693 TEST_REQUIRES_ARM_NEON;
694 for (size_t batch_size = 24; batch_size < 120; batch_size += 12) {
695 VUnOpMicrokernelTester()
696 .batch_size(batch_size)
697 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_p6_x12), VUnOpMicrokernelTester::OpType::ELU);
698 }
699 }
700
TEST(F32_VELU__NEON_RR2_P6_X12,batch_lt_12)701 TEST(F32_VELU__NEON_RR2_P6_X12, batch_lt_12) {
702 TEST_REQUIRES_ARM_NEON;
703 for (size_t batch_size = 1; batch_size < 12; batch_size++) {
704 VUnOpMicrokernelTester()
705 .batch_size(batch_size)
706 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_p6_x12), VUnOpMicrokernelTester::OpType::ELU);
707 }
708 }
709
TEST(F32_VELU__NEON_RR2_P6_X12,batch_gt_12)710 TEST(F32_VELU__NEON_RR2_P6_X12, batch_gt_12) {
711 TEST_REQUIRES_ARM_NEON;
712 for (size_t batch_size = 13; batch_size < 24; batch_size++) {
713 VUnOpMicrokernelTester()
714 .batch_size(batch_size)
715 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_p6_x12), VUnOpMicrokernelTester::OpType::ELU);
716 }
717 }
718
TEST(F32_VELU__NEON_RR2_P6_X12,inplace)719 TEST(F32_VELU__NEON_RR2_P6_X12, inplace) {
720 TEST_REQUIRES_ARM_NEON;
721 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
722 VUnOpMicrokernelTester()
723 .batch_size(batch_size)
724 .inplace(true)
725 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_p6_x12), VUnOpMicrokernelTester::OpType::ELU);
726 }
727 }
728
TEST(F32_VELU__NEON_RR2_P6_X12,prescale)729 TEST(F32_VELU__NEON_RR2_P6_X12, prescale) {
730 TEST_REQUIRES_ARM_NEON;
731 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
732 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
733 VUnOpMicrokernelTester()
734 .batch_size(batch_size)
735 .prescale(prescale)
736 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_p6_x12), VUnOpMicrokernelTester::OpType::ELU);
737 }
738 }
739 }
740
TEST(F32_VELU__NEON_RR2_P6_X12,alpha)741 TEST(F32_VELU__NEON_RR2_P6_X12, alpha) {
742 TEST_REQUIRES_ARM_NEON;
743 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
744 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
745 VUnOpMicrokernelTester()
746 .batch_size(batch_size)
747 .alpha(alpha)
748 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_p6_x12), VUnOpMicrokernelTester::OpType::ELU);
749 }
750 }
751 }
752
TEST(F32_VELU__NEON_RR2_P6_X12,beta)753 TEST(F32_VELU__NEON_RR2_P6_X12, beta) {
754 TEST_REQUIRES_ARM_NEON;
755 for (float beta : std::vector<float>({0.3f, 3.0f})) {
756 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
757 VUnOpMicrokernelTester()
758 .batch_size(batch_size)
759 .beta(beta)
760 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_p6_x12), VUnOpMicrokernelTester::OpType::ELU);
761 }
762 }
763 }
764 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
765
766
767 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEON_RR2_P6_X16,batch_eq_16)768 TEST(F32_VELU__NEON_RR2_P6_X16, batch_eq_16) {
769 TEST_REQUIRES_ARM_NEON;
770 VUnOpMicrokernelTester()
771 .batch_size(16)
772 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
773 }
774
TEST(F32_VELU__NEON_RR2_P6_X16,batch_div_16)775 TEST(F32_VELU__NEON_RR2_P6_X16, batch_div_16) {
776 TEST_REQUIRES_ARM_NEON;
777 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
778 VUnOpMicrokernelTester()
779 .batch_size(batch_size)
780 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
781 }
782 }
783
TEST(F32_VELU__NEON_RR2_P6_X16,batch_lt_16)784 TEST(F32_VELU__NEON_RR2_P6_X16, batch_lt_16) {
785 TEST_REQUIRES_ARM_NEON;
786 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
787 VUnOpMicrokernelTester()
788 .batch_size(batch_size)
789 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
790 }
791 }
792
TEST(F32_VELU__NEON_RR2_P6_X16,batch_gt_16)793 TEST(F32_VELU__NEON_RR2_P6_X16, batch_gt_16) {
794 TEST_REQUIRES_ARM_NEON;
795 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
796 VUnOpMicrokernelTester()
797 .batch_size(batch_size)
798 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
799 }
800 }
801
TEST(F32_VELU__NEON_RR2_P6_X16,inplace)802 TEST(F32_VELU__NEON_RR2_P6_X16, inplace) {
803 TEST_REQUIRES_ARM_NEON;
804 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
805 VUnOpMicrokernelTester()
806 .batch_size(batch_size)
807 .inplace(true)
808 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
809 }
810 }
811
TEST(F32_VELU__NEON_RR2_P6_X16,prescale)812 TEST(F32_VELU__NEON_RR2_P6_X16, prescale) {
813 TEST_REQUIRES_ARM_NEON;
814 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
815 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
816 VUnOpMicrokernelTester()
817 .batch_size(batch_size)
818 .prescale(prescale)
819 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
820 }
821 }
822 }
823
TEST(F32_VELU__NEON_RR2_P6_X16,alpha)824 TEST(F32_VELU__NEON_RR2_P6_X16, alpha) {
825 TEST_REQUIRES_ARM_NEON;
826 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
827 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
828 VUnOpMicrokernelTester()
829 .batch_size(batch_size)
830 .alpha(alpha)
831 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
832 }
833 }
834 }
835
TEST(F32_VELU__NEON_RR2_P6_X16,beta)836 TEST(F32_VELU__NEON_RR2_P6_X16, beta) {
837 TEST_REQUIRES_ARM_NEON;
838 for (float beta : std::vector<float>({0.3f, 3.0f})) {
839 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
840 VUnOpMicrokernelTester()
841 .batch_size(batch_size)
842 .beta(beta)
843 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
844 }
845 }
846 }
847 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
848
849
850 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEON_RR2_P6_X20,batch_eq_20)851 TEST(F32_VELU__NEON_RR2_P6_X20, batch_eq_20) {
852 TEST_REQUIRES_ARM_NEON;
853 VUnOpMicrokernelTester()
854 .batch_size(20)
855 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_p6_x20), VUnOpMicrokernelTester::OpType::ELU);
856 }
857
TEST(F32_VELU__NEON_RR2_P6_X20,batch_div_20)858 TEST(F32_VELU__NEON_RR2_P6_X20, batch_div_20) {
859 TEST_REQUIRES_ARM_NEON;
860 for (size_t batch_size = 40; batch_size < 200; batch_size += 20) {
861 VUnOpMicrokernelTester()
862 .batch_size(batch_size)
863 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_p6_x20), VUnOpMicrokernelTester::OpType::ELU);
864 }
865 }
866
TEST(F32_VELU__NEON_RR2_P6_X20,batch_lt_20)867 TEST(F32_VELU__NEON_RR2_P6_X20, batch_lt_20) {
868 TEST_REQUIRES_ARM_NEON;
869 for (size_t batch_size = 1; batch_size < 20; batch_size++) {
870 VUnOpMicrokernelTester()
871 .batch_size(batch_size)
872 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_p6_x20), VUnOpMicrokernelTester::OpType::ELU);
873 }
874 }
875
TEST(F32_VELU__NEON_RR2_P6_X20,batch_gt_20)876 TEST(F32_VELU__NEON_RR2_P6_X20, batch_gt_20) {
877 TEST_REQUIRES_ARM_NEON;
878 for (size_t batch_size = 21; batch_size < 40; batch_size++) {
879 VUnOpMicrokernelTester()
880 .batch_size(batch_size)
881 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_p6_x20), VUnOpMicrokernelTester::OpType::ELU);
882 }
883 }
884
TEST(F32_VELU__NEON_RR2_P6_X20,inplace)885 TEST(F32_VELU__NEON_RR2_P6_X20, inplace) {
886 TEST_REQUIRES_ARM_NEON;
887 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
888 VUnOpMicrokernelTester()
889 .batch_size(batch_size)
890 .inplace(true)
891 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_p6_x20), VUnOpMicrokernelTester::OpType::ELU);
892 }
893 }
894
TEST(F32_VELU__NEON_RR2_P6_X20,prescale)895 TEST(F32_VELU__NEON_RR2_P6_X20, prescale) {
896 TEST_REQUIRES_ARM_NEON;
897 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
898 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
899 VUnOpMicrokernelTester()
900 .batch_size(batch_size)
901 .prescale(prescale)
902 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_p6_x20), VUnOpMicrokernelTester::OpType::ELU);
903 }
904 }
905 }
906
TEST(F32_VELU__NEON_RR2_P6_X20,alpha)907 TEST(F32_VELU__NEON_RR2_P6_X20, alpha) {
908 TEST_REQUIRES_ARM_NEON;
909 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
910 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
911 VUnOpMicrokernelTester()
912 .batch_size(batch_size)
913 .alpha(alpha)
914 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_p6_x20), VUnOpMicrokernelTester::OpType::ELU);
915 }
916 }
917 }
918
TEST(F32_VELU__NEON_RR2_P6_X20,beta)919 TEST(F32_VELU__NEON_RR2_P6_X20, beta) {
920 TEST_REQUIRES_ARM_NEON;
921 for (float beta : std::vector<float>({0.3f, 3.0f})) {
922 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
923 VUnOpMicrokernelTester()
924 .batch_size(batch_size)
925 .beta(beta)
926 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_p6_x20), VUnOpMicrokernelTester::OpType::ELU);
927 }
928 }
929 }
930 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
931
932
933 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEON_RR2_P6_X24,batch_eq_24)934 TEST(F32_VELU__NEON_RR2_P6_X24, batch_eq_24) {
935 TEST_REQUIRES_ARM_NEON;
936 VUnOpMicrokernelTester()
937 .batch_size(24)
938 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_p6_x24), VUnOpMicrokernelTester::OpType::ELU);
939 }
940
TEST(F32_VELU__NEON_RR2_P6_X24,batch_div_24)941 TEST(F32_VELU__NEON_RR2_P6_X24, batch_div_24) {
942 TEST_REQUIRES_ARM_NEON;
943 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
944 VUnOpMicrokernelTester()
945 .batch_size(batch_size)
946 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_p6_x24), VUnOpMicrokernelTester::OpType::ELU);
947 }
948 }
949
TEST(F32_VELU__NEON_RR2_P6_X24,batch_lt_24)950 TEST(F32_VELU__NEON_RR2_P6_X24, batch_lt_24) {
951 TEST_REQUIRES_ARM_NEON;
952 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
953 VUnOpMicrokernelTester()
954 .batch_size(batch_size)
955 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_p6_x24), VUnOpMicrokernelTester::OpType::ELU);
956 }
957 }
958
TEST(F32_VELU__NEON_RR2_P6_X24,batch_gt_24)959 TEST(F32_VELU__NEON_RR2_P6_X24, batch_gt_24) {
960 TEST_REQUIRES_ARM_NEON;
961 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
962 VUnOpMicrokernelTester()
963 .batch_size(batch_size)
964 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_p6_x24), VUnOpMicrokernelTester::OpType::ELU);
965 }
966 }
967
TEST(F32_VELU__NEON_RR2_P6_X24,inplace)968 TEST(F32_VELU__NEON_RR2_P6_X24, inplace) {
969 TEST_REQUIRES_ARM_NEON;
970 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
971 VUnOpMicrokernelTester()
972 .batch_size(batch_size)
973 .inplace(true)
974 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_p6_x24), VUnOpMicrokernelTester::OpType::ELU);
975 }
976 }
977
TEST(F32_VELU__NEON_RR2_P6_X24,prescale)978 TEST(F32_VELU__NEON_RR2_P6_X24, prescale) {
979 TEST_REQUIRES_ARM_NEON;
980 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
981 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
982 VUnOpMicrokernelTester()
983 .batch_size(batch_size)
984 .prescale(prescale)
985 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_p6_x24), VUnOpMicrokernelTester::OpType::ELU);
986 }
987 }
988 }
989
TEST(F32_VELU__NEON_RR2_P6_X24,alpha)990 TEST(F32_VELU__NEON_RR2_P6_X24, alpha) {
991 TEST_REQUIRES_ARM_NEON;
992 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
993 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
994 VUnOpMicrokernelTester()
995 .batch_size(batch_size)
996 .alpha(alpha)
997 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_p6_x24), VUnOpMicrokernelTester::OpType::ELU);
998 }
999 }
1000 }
1001
TEST(F32_VELU__NEON_RR2_P6_X24,beta)1002 TEST(F32_VELU__NEON_RR2_P6_X24, beta) {
1003 TEST_REQUIRES_ARM_NEON;
1004 for (float beta : std::vector<float>({0.3f, 3.0f})) {
1005 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1006 VUnOpMicrokernelTester()
1007 .batch_size(batch_size)
1008 .beta(beta)
1009 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neon_rr2_p6_x24), VUnOpMicrokernelTester::OpType::ELU);
1010 }
1011 }
1012 }
1013 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1014
1015
1016 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X4,batch_eq_4)1017 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X4, batch_eq_4) {
1018 TEST_REQUIRES_ARM_NEON_FMA;
1019 VUnOpMicrokernelTester()
1020 .batch_size(4)
1021 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x4), VUnOpMicrokernelTester::OpType::ELU);
1022 }
1023
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X4,batch_div_4)1024 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X4, batch_div_4) {
1025 TEST_REQUIRES_ARM_NEON_FMA;
1026 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
1027 VUnOpMicrokernelTester()
1028 .batch_size(batch_size)
1029 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x4), VUnOpMicrokernelTester::OpType::ELU);
1030 }
1031 }
1032
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X4,batch_lt_4)1033 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X4, batch_lt_4) {
1034 TEST_REQUIRES_ARM_NEON_FMA;
1035 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
1036 VUnOpMicrokernelTester()
1037 .batch_size(batch_size)
1038 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x4), VUnOpMicrokernelTester::OpType::ELU);
1039 }
1040 }
1041
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X4,batch_gt_4)1042 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X4, batch_gt_4) {
1043 TEST_REQUIRES_ARM_NEON_FMA;
1044 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
1045 VUnOpMicrokernelTester()
1046 .batch_size(batch_size)
1047 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x4), VUnOpMicrokernelTester::OpType::ELU);
1048 }
1049 }
1050
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X4,inplace)1051 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X4, inplace) {
1052 TEST_REQUIRES_ARM_NEON_FMA;
1053 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
1054 VUnOpMicrokernelTester()
1055 .batch_size(batch_size)
1056 .inplace(true)
1057 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x4), VUnOpMicrokernelTester::OpType::ELU);
1058 }
1059 }
1060
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X4,prescale)1061 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X4, prescale) {
1062 TEST_REQUIRES_ARM_NEON_FMA;
1063 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
1064 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
1065 VUnOpMicrokernelTester()
1066 .batch_size(batch_size)
1067 .prescale(prescale)
1068 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x4), VUnOpMicrokernelTester::OpType::ELU);
1069 }
1070 }
1071 }
1072
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X4,alpha)1073 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X4, alpha) {
1074 TEST_REQUIRES_ARM_NEON_FMA;
1075 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
1076 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
1077 VUnOpMicrokernelTester()
1078 .batch_size(batch_size)
1079 .alpha(alpha)
1080 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x4), VUnOpMicrokernelTester::OpType::ELU);
1081 }
1082 }
1083 }
1084
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X4,beta)1085 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X4, beta) {
1086 TEST_REQUIRES_ARM_NEON_FMA;
1087 for (float beta : std::vector<float>({0.3f, 3.0f})) {
1088 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
1089 VUnOpMicrokernelTester()
1090 .batch_size(batch_size)
1091 .beta(beta)
1092 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x4), VUnOpMicrokernelTester::OpType::ELU);
1093 }
1094 }
1095 }
1096 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1097
1098
1099 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X8,batch_eq_8)1100 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X8, batch_eq_8) {
1101 TEST_REQUIRES_ARM_NEON_FMA;
1102 VUnOpMicrokernelTester()
1103 .batch_size(8)
1104 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x8), VUnOpMicrokernelTester::OpType::ELU);
1105 }
1106
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X8,batch_div_8)1107 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X8, batch_div_8) {
1108 TEST_REQUIRES_ARM_NEON_FMA;
1109 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
1110 VUnOpMicrokernelTester()
1111 .batch_size(batch_size)
1112 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x8), VUnOpMicrokernelTester::OpType::ELU);
1113 }
1114 }
1115
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X8,batch_lt_8)1116 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X8, batch_lt_8) {
1117 TEST_REQUIRES_ARM_NEON_FMA;
1118 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
1119 VUnOpMicrokernelTester()
1120 .batch_size(batch_size)
1121 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x8), VUnOpMicrokernelTester::OpType::ELU);
1122 }
1123 }
1124
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X8,batch_gt_8)1125 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X8, batch_gt_8) {
1126 TEST_REQUIRES_ARM_NEON_FMA;
1127 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
1128 VUnOpMicrokernelTester()
1129 .batch_size(batch_size)
1130 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x8), VUnOpMicrokernelTester::OpType::ELU);
1131 }
1132 }
1133
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X8,inplace)1134 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X8, inplace) {
1135 TEST_REQUIRES_ARM_NEON_FMA;
1136 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1137 VUnOpMicrokernelTester()
1138 .batch_size(batch_size)
1139 .inplace(true)
1140 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x8), VUnOpMicrokernelTester::OpType::ELU);
1141 }
1142 }
1143
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X8,prescale)1144 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X8, prescale) {
1145 TEST_REQUIRES_ARM_NEON_FMA;
1146 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
1147 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1148 VUnOpMicrokernelTester()
1149 .batch_size(batch_size)
1150 .prescale(prescale)
1151 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x8), VUnOpMicrokernelTester::OpType::ELU);
1152 }
1153 }
1154 }
1155
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X8,alpha)1156 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X8, alpha) {
1157 TEST_REQUIRES_ARM_NEON_FMA;
1158 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
1159 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1160 VUnOpMicrokernelTester()
1161 .batch_size(batch_size)
1162 .alpha(alpha)
1163 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x8), VUnOpMicrokernelTester::OpType::ELU);
1164 }
1165 }
1166 }
1167
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X8,beta)1168 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X8, beta) {
1169 TEST_REQUIRES_ARM_NEON_FMA;
1170 for (float beta : std::vector<float>({0.3f, 3.0f})) {
1171 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1172 VUnOpMicrokernelTester()
1173 .batch_size(batch_size)
1174 .beta(beta)
1175 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x8), VUnOpMicrokernelTester::OpType::ELU);
1176 }
1177 }
1178 }
1179 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1180
1181
1182 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X12,batch_eq_12)1183 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X12, batch_eq_12) {
1184 TEST_REQUIRES_ARM_NEON_FMA;
1185 VUnOpMicrokernelTester()
1186 .batch_size(12)
1187 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x12), VUnOpMicrokernelTester::OpType::ELU);
1188 }
1189
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X12,batch_div_12)1190 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X12, batch_div_12) {
1191 TEST_REQUIRES_ARM_NEON_FMA;
1192 for (size_t batch_size = 24; batch_size < 120; batch_size += 12) {
1193 VUnOpMicrokernelTester()
1194 .batch_size(batch_size)
1195 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x12), VUnOpMicrokernelTester::OpType::ELU);
1196 }
1197 }
1198
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X12,batch_lt_12)1199 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X12, batch_lt_12) {
1200 TEST_REQUIRES_ARM_NEON_FMA;
1201 for (size_t batch_size = 1; batch_size < 12; batch_size++) {
1202 VUnOpMicrokernelTester()
1203 .batch_size(batch_size)
1204 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x12), VUnOpMicrokernelTester::OpType::ELU);
1205 }
1206 }
1207
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X12,batch_gt_12)1208 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X12, batch_gt_12) {
1209 TEST_REQUIRES_ARM_NEON_FMA;
1210 for (size_t batch_size = 13; batch_size < 24; batch_size++) {
1211 VUnOpMicrokernelTester()
1212 .batch_size(batch_size)
1213 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x12), VUnOpMicrokernelTester::OpType::ELU);
1214 }
1215 }
1216
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X12,inplace)1217 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X12, inplace) {
1218 TEST_REQUIRES_ARM_NEON_FMA;
1219 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
1220 VUnOpMicrokernelTester()
1221 .batch_size(batch_size)
1222 .inplace(true)
1223 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x12), VUnOpMicrokernelTester::OpType::ELU);
1224 }
1225 }
1226
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X12,prescale)1227 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X12, prescale) {
1228 TEST_REQUIRES_ARM_NEON_FMA;
1229 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
1230 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
1231 VUnOpMicrokernelTester()
1232 .batch_size(batch_size)
1233 .prescale(prescale)
1234 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x12), VUnOpMicrokernelTester::OpType::ELU);
1235 }
1236 }
1237 }
1238
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X12,alpha)1239 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X12, alpha) {
1240 TEST_REQUIRES_ARM_NEON_FMA;
1241 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
1242 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
1243 VUnOpMicrokernelTester()
1244 .batch_size(batch_size)
1245 .alpha(alpha)
1246 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x12), VUnOpMicrokernelTester::OpType::ELU);
1247 }
1248 }
1249 }
1250
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X12,beta)1251 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X12, beta) {
1252 TEST_REQUIRES_ARM_NEON_FMA;
1253 for (float beta : std::vector<float>({0.3f, 3.0f})) {
1254 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
1255 VUnOpMicrokernelTester()
1256 .batch_size(batch_size)
1257 .beta(beta)
1258 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x12), VUnOpMicrokernelTester::OpType::ELU);
1259 }
1260 }
1261 }
1262 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1263
1264
1265 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X16,batch_eq_16)1266 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X16, batch_eq_16) {
1267 TEST_REQUIRES_ARM_NEON_FMA;
1268 VUnOpMicrokernelTester()
1269 .batch_size(16)
1270 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x16), VUnOpMicrokernelTester::OpType::ELU);
1271 }
1272
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X16,batch_div_16)1273 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X16, batch_div_16) {
1274 TEST_REQUIRES_ARM_NEON_FMA;
1275 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
1276 VUnOpMicrokernelTester()
1277 .batch_size(batch_size)
1278 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x16), VUnOpMicrokernelTester::OpType::ELU);
1279 }
1280 }
1281
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X16,batch_lt_16)1282 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X16, batch_lt_16) {
1283 TEST_REQUIRES_ARM_NEON_FMA;
1284 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
1285 VUnOpMicrokernelTester()
1286 .batch_size(batch_size)
1287 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x16), VUnOpMicrokernelTester::OpType::ELU);
1288 }
1289 }
1290
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X16,batch_gt_16)1291 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X16, batch_gt_16) {
1292 TEST_REQUIRES_ARM_NEON_FMA;
1293 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
1294 VUnOpMicrokernelTester()
1295 .batch_size(batch_size)
1296 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x16), VUnOpMicrokernelTester::OpType::ELU);
1297 }
1298 }
1299
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X16,inplace)1300 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X16, inplace) {
1301 TEST_REQUIRES_ARM_NEON_FMA;
1302 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1303 VUnOpMicrokernelTester()
1304 .batch_size(batch_size)
1305 .inplace(true)
1306 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x16), VUnOpMicrokernelTester::OpType::ELU);
1307 }
1308 }
1309
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X16,prescale)1310 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X16, prescale) {
1311 TEST_REQUIRES_ARM_NEON_FMA;
1312 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
1313 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1314 VUnOpMicrokernelTester()
1315 .batch_size(batch_size)
1316 .prescale(prescale)
1317 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x16), VUnOpMicrokernelTester::OpType::ELU);
1318 }
1319 }
1320 }
1321
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X16,alpha)1322 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X16, alpha) {
1323 TEST_REQUIRES_ARM_NEON_FMA;
1324 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
1325 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1326 VUnOpMicrokernelTester()
1327 .batch_size(batch_size)
1328 .alpha(alpha)
1329 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x16), VUnOpMicrokernelTester::OpType::ELU);
1330 }
1331 }
1332 }
1333
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X16,beta)1334 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X16, beta) {
1335 TEST_REQUIRES_ARM_NEON_FMA;
1336 for (float beta : std::vector<float>({0.3f, 3.0f})) {
1337 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1338 VUnOpMicrokernelTester()
1339 .batch_size(batch_size)
1340 .beta(beta)
1341 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x16), VUnOpMicrokernelTester::OpType::ELU);
1342 }
1343 }
1344 }
1345 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1346
1347
1348 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X20,batch_eq_20)1349 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X20, batch_eq_20) {
1350 TEST_REQUIRES_ARM_NEON_FMA;
1351 VUnOpMicrokernelTester()
1352 .batch_size(20)
1353 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x20), VUnOpMicrokernelTester::OpType::ELU);
1354 }
1355
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X20,batch_div_20)1356 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X20, batch_div_20) {
1357 TEST_REQUIRES_ARM_NEON_FMA;
1358 for (size_t batch_size = 40; batch_size < 200; batch_size += 20) {
1359 VUnOpMicrokernelTester()
1360 .batch_size(batch_size)
1361 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x20), VUnOpMicrokernelTester::OpType::ELU);
1362 }
1363 }
1364
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X20,batch_lt_20)1365 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X20, batch_lt_20) {
1366 TEST_REQUIRES_ARM_NEON_FMA;
1367 for (size_t batch_size = 1; batch_size < 20; batch_size++) {
1368 VUnOpMicrokernelTester()
1369 .batch_size(batch_size)
1370 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x20), VUnOpMicrokernelTester::OpType::ELU);
1371 }
1372 }
1373
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X20,batch_gt_20)1374 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X20, batch_gt_20) {
1375 TEST_REQUIRES_ARM_NEON_FMA;
1376 for (size_t batch_size = 21; batch_size < 40; batch_size++) {
1377 VUnOpMicrokernelTester()
1378 .batch_size(batch_size)
1379 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x20), VUnOpMicrokernelTester::OpType::ELU);
1380 }
1381 }
1382
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X20,inplace)1383 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X20, inplace) {
1384 TEST_REQUIRES_ARM_NEON_FMA;
1385 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
1386 VUnOpMicrokernelTester()
1387 .batch_size(batch_size)
1388 .inplace(true)
1389 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x20), VUnOpMicrokernelTester::OpType::ELU);
1390 }
1391 }
1392
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X20,prescale)1393 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X20, prescale) {
1394 TEST_REQUIRES_ARM_NEON_FMA;
1395 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
1396 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
1397 VUnOpMicrokernelTester()
1398 .batch_size(batch_size)
1399 .prescale(prescale)
1400 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x20), VUnOpMicrokernelTester::OpType::ELU);
1401 }
1402 }
1403 }
1404
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X20,alpha)1405 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X20, alpha) {
1406 TEST_REQUIRES_ARM_NEON_FMA;
1407 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
1408 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
1409 VUnOpMicrokernelTester()
1410 .batch_size(batch_size)
1411 .alpha(alpha)
1412 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x20), VUnOpMicrokernelTester::OpType::ELU);
1413 }
1414 }
1415 }
1416
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X20,beta)1417 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X20, beta) {
1418 TEST_REQUIRES_ARM_NEON_FMA;
1419 for (float beta : std::vector<float>({0.3f, 3.0f})) {
1420 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
1421 VUnOpMicrokernelTester()
1422 .batch_size(batch_size)
1423 .beta(beta)
1424 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x20), VUnOpMicrokernelTester::OpType::ELU);
1425 }
1426 }
1427 }
1428 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1429
1430
1431 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X24,batch_eq_24)1432 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X24, batch_eq_24) {
1433 TEST_REQUIRES_ARM_NEON_FMA;
1434 VUnOpMicrokernelTester()
1435 .batch_size(24)
1436 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24), VUnOpMicrokernelTester::OpType::ELU);
1437 }
1438
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X24,batch_div_24)1439 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X24, batch_div_24) {
1440 TEST_REQUIRES_ARM_NEON_FMA;
1441 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
1442 VUnOpMicrokernelTester()
1443 .batch_size(batch_size)
1444 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24), VUnOpMicrokernelTester::OpType::ELU);
1445 }
1446 }
1447
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X24,batch_lt_24)1448 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X24, batch_lt_24) {
1449 TEST_REQUIRES_ARM_NEON_FMA;
1450 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
1451 VUnOpMicrokernelTester()
1452 .batch_size(batch_size)
1453 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24), VUnOpMicrokernelTester::OpType::ELU);
1454 }
1455 }
1456
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X24,batch_gt_24)1457 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X24, batch_gt_24) {
1458 TEST_REQUIRES_ARM_NEON_FMA;
1459 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
1460 VUnOpMicrokernelTester()
1461 .batch_size(batch_size)
1462 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24), VUnOpMicrokernelTester::OpType::ELU);
1463 }
1464 }
1465
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X24,inplace)1466 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X24, inplace) {
1467 TEST_REQUIRES_ARM_NEON_FMA;
1468 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1469 VUnOpMicrokernelTester()
1470 .batch_size(batch_size)
1471 .inplace(true)
1472 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24), VUnOpMicrokernelTester::OpType::ELU);
1473 }
1474 }
1475
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X24,prescale)1476 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X24, prescale) {
1477 TEST_REQUIRES_ARM_NEON_FMA;
1478 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
1479 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1480 VUnOpMicrokernelTester()
1481 .batch_size(batch_size)
1482 .prescale(prescale)
1483 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24), VUnOpMicrokernelTester::OpType::ELU);
1484 }
1485 }
1486 }
1487
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X24,alpha)1488 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X24, alpha) {
1489 TEST_REQUIRES_ARM_NEON_FMA;
1490 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
1491 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1492 VUnOpMicrokernelTester()
1493 .batch_size(batch_size)
1494 .alpha(alpha)
1495 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24), VUnOpMicrokernelTester::OpType::ELU);
1496 }
1497 }
1498 }
1499
TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X24,beta)1500 TEST(F32_VELU__NEONFMA_RR1_LUT16_P3_X24, beta) {
1501 TEST_REQUIRES_ARM_NEON_FMA;
1502 for (float beta : std::vector<float>({0.3f, 3.0f})) {
1503 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1504 VUnOpMicrokernelTester()
1505 .batch_size(batch_size)
1506 .beta(beta)
1507 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24), VUnOpMicrokernelTester::OpType::ELU);
1508 }
1509 }
1510 }
1511 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1512
1513
1514 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEONFMA_RR1_P6_X4,batch_eq_4)1515 TEST(F32_VELU__NEONFMA_RR1_P6_X4, batch_eq_4) {
1516 TEST_REQUIRES_ARM_NEON_FMA;
1517 VUnOpMicrokernelTester()
1518 .batch_size(4)
1519 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_p6_x4), VUnOpMicrokernelTester::OpType::ELU);
1520 }
1521
TEST(F32_VELU__NEONFMA_RR1_P6_X4,batch_div_4)1522 TEST(F32_VELU__NEONFMA_RR1_P6_X4, batch_div_4) {
1523 TEST_REQUIRES_ARM_NEON_FMA;
1524 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
1525 VUnOpMicrokernelTester()
1526 .batch_size(batch_size)
1527 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_p6_x4), VUnOpMicrokernelTester::OpType::ELU);
1528 }
1529 }
1530
TEST(F32_VELU__NEONFMA_RR1_P6_X4,batch_lt_4)1531 TEST(F32_VELU__NEONFMA_RR1_P6_X4, batch_lt_4) {
1532 TEST_REQUIRES_ARM_NEON_FMA;
1533 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
1534 VUnOpMicrokernelTester()
1535 .batch_size(batch_size)
1536 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_p6_x4), VUnOpMicrokernelTester::OpType::ELU);
1537 }
1538 }
1539
TEST(F32_VELU__NEONFMA_RR1_P6_X4,batch_gt_4)1540 TEST(F32_VELU__NEONFMA_RR1_P6_X4, batch_gt_4) {
1541 TEST_REQUIRES_ARM_NEON_FMA;
1542 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
1543 VUnOpMicrokernelTester()
1544 .batch_size(batch_size)
1545 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_p6_x4), VUnOpMicrokernelTester::OpType::ELU);
1546 }
1547 }
1548
TEST(F32_VELU__NEONFMA_RR1_P6_X4,inplace)1549 TEST(F32_VELU__NEONFMA_RR1_P6_X4, inplace) {
1550 TEST_REQUIRES_ARM_NEON_FMA;
1551 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
1552 VUnOpMicrokernelTester()
1553 .batch_size(batch_size)
1554 .inplace(true)
1555 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_p6_x4), VUnOpMicrokernelTester::OpType::ELU);
1556 }
1557 }
1558
TEST(F32_VELU__NEONFMA_RR1_P6_X4,prescale)1559 TEST(F32_VELU__NEONFMA_RR1_P6_X4, prescale) {
1560 TEST_REQUIRES_ARM_NEON_FMA;
1561 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
1562 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
1563 VUnOpMicrokernelTester()
1564 .batch_size(batch_size)
1565 .prescale(prescale)
1566 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_p6_x4), VUnOpMicrokernelTester::OpType::ELU);
1567 }
1568 }
1569 }
1570
TEST(F32_VELU__NEONFMA_RR1_P6_X4,alpha)1571 TEST(F32_VELU__NEONFMA_RR1_P6_X4, alpha) {
1572 TEST_REQUIRES_ARM_NEON_FMA;
1573 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
1574 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
1575 VUnOpMicrokernelTester()
1576 .batch_size(batch_size)
1577 .alpha(alpha)
1578 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_p6_x4), VUnOpMicrokernelTester::OpType::ELU);
1579 }
1580 }
1581 }
1582
TEST(F32_VELU__NEONFMA_RR1_P6_X4,beta)1583 TEST(F32_VELU__NEONFMA_RR1_P6_X4, beta) {
1584 TEST_REQUIRES_ARM_NEON_FMA;
1585 for (float beta : std::vector<float>({0.3f, 3.0f})) {
1586 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
1587 VUnOpMicrokernelTester()
1588 .batch_size(batch_size)
1589 .beta(beta)
1590 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_p6_x4), VUnOpMicrokernelTester::OpType::ELU);
1591 }
1592 }
1593 }
1594 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1595
1596
1597 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEONFMA_RR1_P6_X8,batch_eq_8)1598 TEST(F32_VELU__NEONFMA_RR1_P6_X8, batch_eq_8) {
1599 TEST_REQUIRES_ARM_NEON_FMA;
1600 VUnOpMicrokernelTester()
1601 .batch_size(8)
1602 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_p6_x8), VUnOpMicrokernelTester::OpType::ELU);
1603 }
1604
TEST(F32_VELU__NEONFMA_RR1_P6_X8,batch_div_8)1605 TEST(F32_VELU__NEONFMA_RR1_P6_X8, batch_div_8) {
1606 TEST_REQUIRES_ARM_NEON_FMA;
1607 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
1608 VUnOpMicrokernelTester()
1609 .batch_size(batch_size)
1610 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_p6_x8), VUnOpMicrokernelTester::OpType::ELU);
1611 }
1612 }
1613
TEST(F32_VELU__NEONFMA_RR1_P6_X8,batch_lt_8)1614 TEST(F32_VELU__NEONFMA_RR1_P6_X8, batch_lt_8) {
1615 TEST_REQUIRES_ARM_NEON_FMA;
1616 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
1617 VUnOpMicrokernelTester()
1618 .batch_size(batch_size)
1619 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_p6_x8), VUnOpMicrokernelTester::OpType::ELU);
1620 }
1621 }
1622
TEST(F32_VELU__NEONFMA_RR1_P6_X8,batch_gt_8)1623 TEST(F32_VELU__NEONFMA_RR1_P6_X8, batch_gt_8) {
1624 TEST_REQUIRES_ARM_NEON_FMA;
1625 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
1626 VUnOpMicrokernelTester()
1627 .batch_size(batch_size)
1628 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_p6_x8), VUnOpMicrokernelTester::OpType::ELU);
1629 }
1630 }
1631
TEST(F32_VELU__NEONFMA_RR1_P6_X8,inplace)1632 TEST(F32_VELU__NEONFMA_RR1_P6_X8, inplace) {
1633 TEST_REQUIRES_ARM_NEON_FMA;
1634 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1635 VUnOpMicrokernelTester()
1636 .batch_size(batch_size)
1637 .inplace(true)
1638 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_p6_x8), VUnOpMicrokernelTester::OpType::ELU);
1639 }
1640 }
1641
TEST(F32_VELU__NEONFMA_RR1_P6_X8,prescale)1642 TEST(F32_VELU__NEONFMA_RR1_P6_X8, prescale) {
1643 TEST_REQUIRES_ARM_NEON_FMA;
1644 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
1645 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1646 VUnOpMicrokernelTester()
1647 .batch_size(batch_size)
1648 .prescale(prescale)
1649 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_p6_x8), VUnOpMicrokernelTester::OpType::ELU);
1650 }
1651 }
1652 }
1653
TEST(F32_VELU__NEONFMA_RR1_P6_X8,alpha)1654 TEST(F32_VELU__NEONFMA_RR1_P6_X8, alpha) {
1655 TEST_REQUIRES_ARM_NEON_FMA;
1656 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
1657 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1658 VUnOpMicrokernelTester()
1659 .batch_size(batch_size)
1660 .alpha(alpha)
1661 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_p6_x8), VUnOpMicrokernelTester::OpType::ELU);
1662 }
1663 }
1664 }
1665
TEST(F32_VELU__NEONFMA_RR1_P6_X8,beta)1666 TEST(F32_VELU__NEONFMA_RR1_P6_X8, beta) {
1667 TEST_REQUIRES_ARM_NEON_FMA;
1668 for (float beta : std::vector<float>({0.3f, 3.0f})) {
1669 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
1670 VUnOpMicrokernelTester()
1671 .batch_size(batch_size)
1672 .beta(beta)
1673 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_p6_x8), VUnOpMicrokernelTester::OpType::ELU);
1674 }
1675 }
1676 }
1677 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1678
1679
1680 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEONFMA_RR1_P6_X12,batch_eq_12)1681 TEST(F32_VELU__NEONFMA_RR1_P6_X12, batch_eq_12) {
1682 TEST_REQUIRES_ARM_NEON_FMA;
1683 VUnOpMicrokernelTester()
1684 .batch_size(12)
1685 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_p6_x12), VUnOpMicrokernelTester::OpType::ELU);
1686 }
1687
TEST(F32_VELU__NEONFMA_RR1_P6_X12,batch_div_12)1688 TEST(F32_VELU__NEONFMA_RR1_P6_X12, batch_div_12) {
1689 TEST_REQUIRES_ARM_NEON_FMA;
1690 for (size_t batch_size = 24; batch_size < 120; batch_size += 12) {
1691 VUnOpMicrokernelTester()
1692 .batch_size(batch_size)
1693 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_p6_x12), VUnOpMicrokernelTester::OpType::ELU);
1694 }
1695 }
1696
TEST(F32_VELU__NEONFMA_RR1_P6_X12,batch_lt_12)1697 TEST(F32_VELU__NEONFMA_RR1_P6_X12, batch_lt_12) {
1698 TEST_REQUIRES_ARM_NEON_FMA;
1699 for (size_t batch_size = 1; batch_size < 12; batch_size++) {
1700 VUnOpMicrokernelTester()
1701 .batch_size(batch_size)
1702 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_p6_x12), VUnOpMicrokernelTester::OpType::ELU);
1703 }
1704 }
1705
TEST(F32_VELU__NEONFMA_RR1_P6_X12,batch_gt_12)1706 TEST(F32_VELU__NEONFMA_RR1_P6_X12, batch_gt_12) {
1707 TEST_REQUIRES_ARM_NEON_FMA;
1708 for (size_t batch_size = 13; batch_size < 24; batch_size++) {
1709 VUnOpMicrokernelTester()
1710 .batch_size(batch_size)
1711 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_p6_x12), VUnOpMicrokernelTester::OpType::ELU);
1712 }
1713 }
1714
TEST(F32_VELU__NEONFMA_RR1_P6_X12,inplace)1715 TEST(F32_VELU__NEONFMA_RR1_P6_X12, inplace) {
1716 TEST_REQUIRES_ARM_NEON_FMA;
1717 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
1718 VUnOpMicrokernelTester()
1719 .batch_size(batch_size)
1720 .inplace(true)
1721 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_p6_x12), VUnOpMicrokernelTester::OpType::ELU);
1722 }
1723 }
1724
TEST(F32_VELU__NEONFMA_RR1_P6_X12,prescale)1725 TEST(F32_VELU__NEONFMA_RR1_P6_X12, prescale) {
1726 TEST_REQUIRES_ARM_NEON_FMA;
1727 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
1728 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
1729 VUnOpMicrokernelTester()
1730 .batch_size(batch_size)
1731 .prescale(prescale)
1732 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_p6_x12), VUnOpMicrokernelTester::OpType::ELU);
1733 }
1734 }
1735 }
1736
TEST(F32_VELU__NEONFMA_RR1_P6_X12,alpha)1737 TEST(F32_VELU__NEONFMA_RR1_P6_X12, alpha) {
1738 TEST_REQUIRES_ARM_NEON_FMA;
1739 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
1740 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
1741 VUnOpMicrokernelTester()
1742 .batch_size(batch_size)
1743 .alpha(alpha)
1744 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_p6_x12), VUnOpMicrokernelTester::OpType::ELU);
1745 }
1746 }
1747 }
1748
TEST(F32_VELU__NEONFMA_RR1_P6_X12,beta)1749 TEST(F32_VELU__NEONFMA_RR1_P6_X12, beta) {
1750 TEST_REQUIRES_ARM_NEON_FMA;
1751 for (float beta : std::vector<float>({0.3f, 3.0f})) {
1752 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
1753 VUnOpMicrokernelTester()
1754 .batch_size(batch_size)
1755 .beta(beta)
1756 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_p6_x12), VUnOpMicrokernelTester::OpType::ELU);
1757 }
1758 }
1759 }
1760 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1761
1762
1763 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEONFMA_RR1_P6_X16,batch_eq_16)1764 TEST(F32_VELU__NEONFMA_RR1_P6_X16, batch_eq_16) {
1765 TEST_REQUIRES_ARM_NEON_FMA;
1766 VUnOpMicrokernelTester()
1767 .batch_size(16)
1768 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
1769 }
1770
TEST(F32_VELU__NEONFMA_RR1_P6_X16,batch_div_16)1771 TEST(F32_VELU__NEONFMA_RR1_P6_X16, batch_div_16) {
1772 TEST_REQUIRES_ARM_NEON_FMA;
1773 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
1774 VUnOpMicrokernelTester()
1775 .batch_size(batch_size)
1776 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
1777 }
1778 }
1779
TEST(F32_VELU__NEONFMA_RR1_P6_X16,batch_lt_16)1780 TEST(F32_VELU__NEONFMA_RR1_P6_X16, batch_lt_16) {
1781 TEST_REQUIRES_ARM_NEON_FMA;
1782 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
1783 VUnOpMicrokernelTester()
1784 .batch_size(batch_size)
1785 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
1786 }
1787 }
1788
TEST(F32_VELU__NEONFMA_RR1_P6_X16,batch_gt_16)1789 TEST(F32_VELU__NEONFMA_RR1_P6_X16, batch_gt_16) {
1790 TEST_REQUIRES_ARM_NEON_FMA;
1791 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
1792 VUnOpMicrokernelTester()
1793 .batch_size(batch_size)
1794 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
1795 }
1796 }
1797
TEST(F32_VELU__NEONFMA_RR1_P6_X16,inplace)1798 TEST(F32_VELU__NEONFMA_RR1_P6_X16, inplace) {
1799 TEST_REQUIRES_ARM_NEON_FMA;
1800 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1801 VUnOpMicrokernelTester()
1802 .batch_size(batch_size)
1803 .inplace(true)
1804 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
1805 }
1806 }
1807
TEST(F32_VELU__NEONFMA_RR1_P6_X16,prescale)1808 TEST(F32_VELU__NEONFMA_RR1_P6_X16, prescale) {
1809 TEST_REQUIRES_ARM_NEON_FMA;
1810 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
1811 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1812 VUnOpMicrokernelTester()
1813 .batch_size(batch_size)
1814 .prescale(prescale)
1815 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
1816 }
1817 }
1818 }
1819
TEST(F32_VELU__NEONFMA_RR1_P6_X16,alpha)1820 TEST(F32_VELU__NEONFMA_RR1_P6_X16, alpha) {
1821 TEST_REQUIRES_ARM_NEON_FMA;
1822 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
1823 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1824 VUnOpMicrokernelTester()
1825 .batch_size(batch_size)
1826 .alpha(alpha)
1827 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
1828 }
1829 }
1830 }
1831
TEST(F32_VELU__NEONFMA_RR1_P6_X16,beta)1832 TEST(F32_VELU__NEONFMA_RR1_P6_X16, beta) {
1833 TEST_REQUIRES_ARM_NEON_FMA;
1834 for (float beta : std::vector<float>({0.3f, 3.0f})) {
1835 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
1836 VUnOpMicrokernelTester()
1837 .batch_size(batch_size)
1838 .beta(beta)
1839 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
1840 }
1841 }
1842 }
1843 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1844
1845
1846 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEONFMA_RR1_P6_X20,batch_eq_20)1847 TEST(F32_VELU__NEONFMA_RR1_P6_X20, batch_eq_20) {
1848 TEST_REQUIRES_ARM_NEON_FMA;
1849 VUnOpMicrokernelTester()
1850 .batch_size(20)
1851 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_p6_x20), VUnOpMicrokernelTester::OpType::ELU);
1852 }
1853
TEST(F32_VELU__NEONFMA_RR1_P6_X20,batch_div_20)1854 TEST(F32_VELU__NEONFMA_RR1_P6_X20, batch_div_20) {
1855 TEST_REQUIRES_ARM_NEON_FMA;
1856 for (size_t batch_size = 40; batch_size < 200; batch_size += 20) {
1857 VUnOpMicrokernelTester()
1858 .batch_size(batch_size)
1859 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_p6_x20), VUnOpMicrokernelTester::OpType::ELU);
1860 }
1861 }
1862
TEST(F32_VELU__NEONFMA_RR1_P6_X20,batch_lt_20)1863 TEST(F32_VELU__NEONFMA_RR1_P6_X20, batch_lt_20) {
1864 TEST_REQUIRES_ARM_NEON_FMA;
1865 for (size_t batch_size = 1; batch_size < 20; batch_size++) {
1866 VUnOpMicrokernelTester()
1867 .batch_size(batch_size)
1868 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_p6_x20), VUnOpMicrokernelTester::OpType::ELU);
1869 }
1870 }
1871
TEST(F32_VELU__NEONFMA_RR1_P6_X20,batch_gt_20)1872 TEST(F32_VELU__NEONFMA_RR1_P6_X20, batch_gt_20) {
1873 TEST_REQUIRES_ARM_NEON_FMA;
1874 for (size_t batch_size = 21; batch_size < 40; batch_size++) {
1875 VUnOpMicrokernelTester()
1876 .batch_size(batch_size)
1877 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_p6_x20), VUnOpMicrokernelTester::OpType::ELU);
1878 }
1879 }
1880
TEST(F32_VELU__NEONFMA_RR1_P6_X20,inplace)1881 TEST(F32_VELU__NEONFMA_RR1_P6_X20, inplace) {
1882 TEST_REQUIRES_ARM_NEON_FMA;
1883 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
1884 VUnOpMicrokernelTester()
1885 .batch_size(batch_size)
1886 .inplace(true)
1887 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_p6_x20), VUnOpMicrokernelTester::OpType::ELU);
1888 }
1889 }
1890
TEST(F32_VELU__NEONFMA_RR1_P6_X20,prescale)1891 TEST(F32_VELU__NEONFMA_RR1_P6_X20, prescale) {
1892 TEST_REQUIRES_ARM_NEON_FMA;
1893 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
1894 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
1895 VUnOpMicrokernelTester()
1896 .batch_size(batch_size)
1897 .prescale(prescale)
1898 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_p6_x20), VUnOpMicrokernelTester::OpType::ELU);
1899 }
1900 }
1901 }
1902
TEST(F32_VELU__NEONFMA_RR1_P6_X20,alpha)1903 TEST(F32_VELU__NEONFMA_RR1_P6_X20, alpha) {
1904 TEST_REQUIRES_ARM_NEON_FMA;
1905 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
1906 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
1907 VUnOpMicrokernelTester()
1908 .batch_size(batch_size)
1909 .alpha(alpha)
1910 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_p6_x20), VUnOpMicrokernelTester::OpType::ELU);
1911 }
1912 }
1913 }
1914
TEST(F32_VELU__NEONFMA_RR1_P6_X20,beta)1915 TEST(F32_VELU__NEONFMA_RR1_P6_X20, beta) {
1916 TEST_REQUIRES_ARM_NEON_FMA;
1917 for (float beta : std::vector<float>({0.3f, 3.0f})) {
1918 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
1919 VUnOpMicrokernelTester()
1920 .batch_size(batch_size)
1921 .beta(beta)
1922 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_p6_x20), VUnOpMicrokernelTester::OpType::ELU);
1923 }
1924 }
1925 }
1926 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
1927
1928
1929 #if XNN_ARCH_ARM || XNN_ARCH_ARM64
TEST(F32_VELU__NEONFMA_RR1_P6_X24,batch_eq_24)1930 TEST(F32_VELU__NEONFMA_RR1_P6_X24, batch_eq_24) {
1931 TEST_REQUIRES_ARM_NEON_FMA;
1932 VUnOpMicrokernelTester()
1933 .batch_size(24)
1934 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_p6_x24), VUnOpMicrokernelTester::OpType::ELU);
1935 }
1936
TEST(F32_VELU__NEONFMA_RR1_P6_X24,batch_div_24)1937 TEST(F32_VELU__NEONFMA_RR1_P6_X24, batch_div_24) {
1938 TEST_REQUIRES_ARM_NEON_FMA;
1939 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
1940 VUnOpMicrokernelTester()
1941 .batch_size(batch_size)
1942 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_p6_x24), VUnOpMicrokernelTester::OpType::ELU);
1943 }
1944 }
1945
TEST(F32_VELU__NEONFMA_RR1_P6_X24,batch_lt_24)1946 TEST(F32_VELU__NEONFMA_RR1_P6_X24, batch_lt_24) {
1947 TEST_REQUIRES_ARM_NEON_FMA;
1948 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
1949 VUnOpMicrokernelTester()
1950 .batch_size(batch_size)
1951 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_p6_x24), VUnOpMicrokernelTester::OpType::ELU);
1952 }
1953 }
1954
TEST(F32_VELU__NEONFMA_RR1_P6_X24,batch_gt_24)1955 TEST(F32_VELU__NEONFMA_RR1_P6_X24, batch_gt_24) {
1956 TEST_REQUIRES_ARM_NEON_FMA;
1957 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
1958 VUnOpMicrokernelTester()
1959 .batch_size(batch_size)
1960 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_p6_x24), VUnOpMicrokernelTester::OpType::ELU);
1961 }
1962 }
1963
TEST(F32_VELU__NEONFMA_RR1_P6_X24,inplace)1964 TEST(F32_VELU__NEONFMA_RR1_P6_X24, inplace) {
1965 TEST_REQUIRES_ARM_NEON_FMA;
1966 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1967 VUnOpMicrokernelTester()
1968 .batch_size(batch_size)
1969 .inplace(true)
1970 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_p6_x24), VUnOpMicrokernelTester::OpType::ELU);
1971 }
1972 }
1973
TEST(F32_VELU__NEONFMA_RR1_P6_X24,prescale)1974 TEST(F32_VELU__NEONFMA_RR1_P6_X24, prescale) {
1975 TEST_REQUIRES_ARM_NEON_FMA;
1976 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
1977 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1978 VUnOpMicrokernelTester()
1979 .batch_size(batch_size)
1980 .prescale(prescale)
1981 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_p6_x24), VUnOpMicrokernelTester::OpType::ELU);
1982 }
1983 }
1984 }
1985
TEST(F32_VELU__NEONFMA_RR1_P6_X24,alpha)1986 TEST(F32_VELU__NEONFMA_RR1_P6_X24, alpha) {
1987 TEST_REQUIRES_ARM_NEON_FMA;
1988 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
1989 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
1990 VUnOpMicrokernelTester()
1991 .batch_size(batch_size)
1992 .alpha(alpha)
1993 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_p6_x24), VUnOpMicrokernelTester::OpType::ELU);
1994 }
1995 }
1996 }
1997
TEST(F32_VELU__NEONFMA_RR1_P6_X24,beta)1998 TEST(F32_VELU__NEONFMA_RR1_P6_X24, beta) {
1999 TEST_REQUIRES_ARM_NEON_FMA;
2000 for (float beta : std::vector<float>({0.3f, 3.0f})) {
2001 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2002 VUnOpMicrokernelTester()
2003 .batch_size(batch_size)
2004 .beta(beta)
2005 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__neonfma_rr1_p6_x24), VUnOpMicrokernelTester::OpType::ELU);
2006 }
2007 }
2008 }
2009 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64
2010
2011
2012 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X4,batch_eq_4)2013 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X4, batch_eq_4) {
2014 TEST_REQUIRES_X86_SSE2;
2015 VUnOpMicrokernelTester()
2016 .batch_size(4)
2017 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4), VUnOpMicrokernelTester::OpType::ELU);
2018 }
2019
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X4,batch_div_4)2020 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X4, batch_div_4) {
2021 TEST_REQUIRES_X86_SSE2;
2022 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
2023 VUnOpMicrokernelTester()
2024 .batch_size(batch_size)
2025 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4), VUnOpMicrokernelTester::OpType::ELU);
2026 }
2027 }
2028
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X4,batch_lt_4)2029 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X4, batch_lt_4) {
2030 TEST_REQUIRES_X86_SSE2;
2031 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
2032 VUnOpMicrokernelTester()
2033 .batch_size(batch_size)
2034 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4), VUnOpMicrokernelTester::OpType::ELU);
2035 }
2036 }
2037
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X4,batch_gt_4)2038 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X4, batch_gt_4) {
2039 TEST_REQUIRES_X86_SSE2;
2040 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
2041 VUnOpMicrokernelTester()
2042 .batch_size(batch_size)
2043 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4), VUnOpMicrokernelTester::OpType::ELU);
2044 }
2045 }
2046
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X4,inplace)2047 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X4, inplace) {
2048 TEST_REQUIRES_X86_SSE2;
2049 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2050 VUnOpMicrokernelTester()
2051 .batch_size(batch_size)
2052 .inplace(true)
2053 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4), VUnOpMicrokernelTester::OpType::ELU);
2054 }
2055 }
2056
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X4,prescale)2057 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X4, prescale) {
2058 TEST_REQUIRES_X86_SSE2;
2059 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
2060 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2061 VUnOpMicrokernelTester()
2062 .batch_size(batch_size)
2063 .prescale(prescale)
2064 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4), VUnOpMicrokernelTester::OpType::ELU);
2065 }
2066 }
2067 }
2068
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X4,alpha)2069 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X4, alpha) {
2070 TEST_REQUIRES_X86_SSE2;
2071 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
2072 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2073 VUnOpMicrokernelTester()
2074 .batch_size(batch_size)
2075 .alpha(alpha)
2076 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4), VUnOpMicrokernelTester::OpType::ELU);
2077 }
2078 }
2079 }
2080
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X4,beta)2081 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X4, beta) {
2082 TEST_REQUIRES_X86_SSE2;
2083 for (float beta : std::vector<float>({0.3f, 3.0f})) {
2084 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2085 VUnOpMicrokernelTester()
2086 .batch_size(batch_size)
2087 .beta(beta)
2088 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x4), VUnOpMicrokernelTester::OpType::ELU);
2089 }
2090 }
2091 }
2092 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2093
2094
2095 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X8,batch_eq_8)2096 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X8, batch_eq_8) {
2097 TEST_REQUIRES_X86_SSE2;
2098 VUnOpMicrokernelTester()
2099 .batch_size(8)
2100 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8), VUnOpMicrokernelTester::OpType::ELU);
2101 }
2102
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X8,batch_div_8)2103 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X8, batch_div_8) {
2104 TEST_REQUIRES_X86_SSE2;
2105 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
2106 VUnOpMicrokernelTester()
2107 .batch_size(batch_size)
2108 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8), VUnOpMicrokernelTester::OpType::ELU);
2109 }
2110 }
2111
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X8,batch_lt_8)2112 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X8, batch_lt_8) {
2113 TEST_REQUIRES_X86_SSE2;
2114 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
2115 VUnOpMicrokernelTester()
2116 .batch_size(batch_size)
2117 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8), VUnOpMicrokernelTester::OpType::ELU);
2118 }
2119 }
2120
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X8,batch_gt_8)2121 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X8, batch_gt_8) {
2122 TEST_REQUIRES_X86_SSE2;
2123 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
2124 VUnOpMicrokernelTester()
2125 .batch_size(batch_size)
2126 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8), VUnOpMicrokernelTester::OpType::ELU);
2127 }
2128 }
2129
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X8,inplace)2130 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X8, inplace) {
2131 TEST_REQUIRES_X86_SSE2;
2132 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2133 VUnOpMicrokernelTester()
2134 .batch_size(batch_size)
2135 .inplace(true)
2136 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8), VUnOpMicrokernelTester::OpType::ELU);
2137 }
2138 }
2139
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X8,prescale)2140 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X8, prescale) {
2141 TEST_REQUIRES_X86_SSE2;
2142 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
2143 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2144 VUnOpMicrokernelTester()
2145 .batch_size(batch_size)
2146 .prescale(prescale)
2147 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8), VUnOpMicrokernelTester::OpType::ELU);
2148 }
2149 }
2150 }
2151
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X8,alpha)2152 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X8, alpha) {
2153 TEST_REQUIRES_X86_SSE2;
2154 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
2155 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2156 VUnOpMicrokernelTester()
2157 .batch_size(batch_size)
2158 .alpha(alpha)
2159 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8), VUnOpMicrokernelTester::OpType::ELU);
2160 }
2161 }
2162 }
2163
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X8,beta)2164 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X8, beta) {
2165 TEST_REQUIRES_X86_SSE2;
2166 for (float beta : std::vector<float>({0.3f, 3.0f})) {
2167 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2168 VUnOpMicrokernelTester()
2169 .batch_size(batch_size)
2170 .beta(beta)
2171 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8), VUnOpMicrokernelTester::OpType::ELU);
2172 }
2173 }
2174 }
2175 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2176
2177
2178 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X12,batch_eq_12)2179 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X12, batch_eq_12) {
2180 TEST_REQUIRES_X86_SSE2;
2181 VUnOpMicrokernelTester()
2182 .batch_size(12)
2183 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12), VUnOpMicrokernelTester::OpType::ELU);
2184 }
2185
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X12,batch_div_12)2186 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X12, batch_div_12) {
2187 TEST_REQUIRES_X86_SSE2;
2188 for (size_t batch_size = 24; batch_size < 120; batch_size += 12) {
2189 VUnOpMicrokernelTester()
2190 .batch_size(batch_size)
2191 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12), VUnOpMicrokernelTester::OpType::ELU);
2192 }
2193 }
2194
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X12,batch_lt_12)2195 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X12, batch_lt_12) {
2196 TEST_REQUIRES_X86_SSE2;
2197 for (size_t batch_size = 1; batch_size < 12; batch_size++) {
2198 VUnOpMicrokernelTester()
2199 .batch_size(batch_size)
2200 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12), VUnOpMicrokernelTester::OpType::ELU);
2201 }
2202 }
2203
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X12,batch_gt_12)2204 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X12, batch_gt_12) {
2205 TEST_REQUIRES_X86_SSE2;
2206 for (size_t batch_size = 13; batch_size < 24; batch_size++) {
2207 VUnOpMicrokernelTester()
2208 .batch_size(batch_size)
2209 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12), VUnOpMicrokernelTester::OpType::ELU);
2210 }
2211 }
2212
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X12,inplace)2213 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X12, inplace) {
2214 TEST_REQUIRES_X86_SSE2;
2215 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
2216 VUnOpMicrokernelTester()
2217 .batch_size(batch_size)
2218 .inplace(true)
2219 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12), VUnOpMicrokernelTester::OpType::ELU);
2220 }
2221 }
2222
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X12,prescale)2223 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X12, prescale) {
2224 TEST_REQUIRES_X86_SSE2;
2225 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
2226 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
2227 VUnOpMicrokernelTester()
2228 .batch_size(batch_size)
2229 .prescale(prescale)
2230 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12), VUnOpMicrokernelTester::OpType::ELU);
2231 }
2232 }
2233 }
2234
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X12,alpha)2235 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X12, alpha) {
2236 TEST_REQUIRES_X86_SSE2;
2237 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
2238 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
2239 VUnOpMicrokernelTester()
2240 .batch_size(batch_size)
2241 .alpha(alpha)
2242 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12), VUnOpMicrokernelTester::OpType::ELU);
2243 }
2244 }
2245 }
2246
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X12,beta)2247 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X12, beta) {
2248 TEST_REQUIRES_X86_SSE2;
2249 for (float beta : std::vector<float>({0.3f, 3.0f})) {
2250 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
2251 VUnOpMicrokernelTester()
2252 .batch_size(batch_size)
2253 .beta(beta)
2254 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12), VUnOpMicrokernelTester::OpType::ELU);
2255 }
2256 }
2257 }
2258 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2259
2260
2261 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X16,batch_eq_16)2262 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X16, batch_eq_16) {
2263 TEST_REQUIRES_X86_SSE2;
2264 VUnOpMicrokernelTester()
2265 .batch_size(16)
2266 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16), VUnOpMicrokernelTester::OpType::ELU);
2267 }
2268
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X16,batch_div_16)2269 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X16, batch_div_16) {
2270 TEST_REQUIRES_X86_SSE2;
2271 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
2272 VUnOpMicrokernelTester()
2273 .batch_size(batch_size)
2274 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16), VUnOpMicrokernelTester::OpType::ELU);
2275 }
2276 }
2277
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X16,batch_lt_16)2278 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X16, batch_lt_16) {
2279 TEST_REQUIRES_X86_SSE2;
2280 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
2281 VUnOpMicrokernelTester()
2282 .batch_size(batch_size)
2283 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16), VUnOpMicrokernelTester::OpType::ELU);
2284 }
2285 }
2286
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X16,batch_gt_16)2287 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X16, batch_gt_16) {
2288 TEST_REQUIRES_X86_SSE2;
2289 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
2290 VUnOpMicrokernelTester()
2291 .batch_size(batch_size)
2292 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16), VUnOpMicrokernelTester::OpType::ELU);
2293 }
2294 }
2295
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X16,inplace)2296 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X16, inplace) {
2297 TEST_REQUIRES_X86_SSE2;
2298 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2299 VUnOpMicrokernelTester()
2300 .batch_size(batch_size)
2301 .inplace(true)
2302 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16), VUnOpMicrokernelTester::OpType::ELU);
2303 }
2304 }
2305
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X16,prescale)2306 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X16, prescale) {
2307 TEST_REQUIRES_X86_SSE2;
2308 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
2309 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2310 VUnOpMicrokernelTester()
2311 .batch_size(batch_size)
2312 .prescale(prescale)
2313 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16), VUnOpMicrokernelTester::OpType::ELU);
2314 }
2315 }
2316 }
2317
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X16,alpha)2318 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X16, alpha) {
2319 TEST_REQUIRES_X86_SSE2;
2320 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
2321 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2322 VUnOpMicrokernelTester()
2323 .batch_size(batch_size)
2324 .alpha(alpha)
2325 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16), VUnOpMicrokernelTester::OpType::ELU);
2326 }
2327 }
2328 }
2329
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X16,beta)2330 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X16, beta) {
2331 TEST_REQUIRES_X86_SSE2;
2332 for (float beta : std::vector<float>({0.3f, 3.0f})) {
2333 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2334 VUnOpMicrokernelTester()
2335 .batch_size(batch_size)
2336 .beta(beta)
2337 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16), VUnOpMicrokernelTester::OpType::ELU);
2338 }
2339 }
2340 }
2341 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2342
2343
2344 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X20,batch_eq_20)2345 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X20, batch_eq_20) {
2346 TEST_REQUIRES_X86_SSE2;
2347 VUnOpMicrokernelTester()
2348 .batch_size(20)
2349 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20), VUnOpMicrokernelTester::OpType::ELU);
2350 }
2351
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X20,batch_div_20)2352 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X20, batch_div_20) {
2353 TEST_REQUIRES_X86_SSE2;
2354 for (size_t batch_size = 40; batch_size < 200; batch_size += 20) {
2355 VUnOpMicrokernelTester()
2356 .batch_size(batch_size)
2357 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20), VUnOpMicrokernelTester::OpType::ELU);
2358 }
2359 }
2360
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X20,batch_lt_20)2361 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X20, batch_lt_20) {
2362 TEST_REQUIRES_X86_SSE2;
2363 for (size_t batch_size = 1; batch_size < 20; batch_size++) {
2364 VUnOpMicrokernelTester()
2365 .batch_size(batch_size)
2366 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20), VUnOpMicrokernelTester::OpType::ELU);
2367 }
2368 }
2369
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X20,batch_gt_20)2370 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X20, batch_gt_20) {
2371 TEST_REQUIRES_X86_SSE2;
2372 for (size_t batch_size = 21; batch_size < 40; batch_size++) {
2373 VUnOpMicrokernelTester()
2374 .batch_size(batch_size)
2375 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20), VUnOpMicrokernelTester::OpType::ELU);
2376 }
2377 }
2378
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X20,inplace)2379 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X20, inplace) {
2380 TEST_REQUIRES_X86_SSE2;
2381 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
2382 VUnOpMicrokernelTester()
2383 .batch_size(batch_size)
2384 .inplace(true)
2385 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20), VUnOpMicrokernelTester::OpType::ELU);
2386 }
2387 }
2388
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X20,prescale)2389 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X20, prescale) {
2390 TEST_REQUIRES_X86_SSE2;
2391 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
2392 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
2393 VUnOpMicrokernelTester()
2394 .batch_size(batch_size)
2395 .prescale(prescale)
2396 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20), VUnOpMicrokernelTester::OpType::ELU);
2397 }
2398 }
2399 }
2400
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X20,alpha)2401 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X20, alpha) {
2402 TEST_REQUIRES_X86_SSE2;
2403 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
2404 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
2405 VUnOpMicrokernelTester()
2406 .batch_size(batch_size)
2407 .alpha(alpha)
2408 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20), VUnOpMicrokernelTester::OpType::ELU);
2409 }
2410 }
2411 }
2412
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X20,beta)2413 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X20, beta) {
2414 TEST_REQUIRES_X86_SSE2;
2415 for (float beta : std::vector<float>({0.3f, 3.0f})) {
2416 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
2417 VUnOpMicrokernelTester()
2418 .batch_size(batch_size)
2419 .beta(beta)
2420 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20), VUnOpMicrokernelTester::OpType::ELU);
2421 }
2422 }
2423 }
2424 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2425
2426
2427 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X24,batch_eq_24)2428 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X24, batch_eq_24) {
2429 TEST_REQUIRES_X86_SSE2;
2430 VUnOpMicrokernelTester()
2431 .batch_size(24)
2432 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24), VUnOpMicrokernelTester::OpType::ELU);
2433 }
2434
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X24,batch_div_24)2435 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X24, batch_div_24) {
2436 TEST_REQUIRES_X86_SSE2;
2437 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
2438 VUnOpMicrokernelTester()
2439 .batch_size(batch_size)
2440 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24), VUnOpMicrokernelTester::OpType::ELU);
2441 }
2442 }
2443
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X24,batch_lt_24)2444 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X24, batch_lt_24) {
2445 TEST_REQUIRES_X86_SSE2;
2446 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
2447 VUnOpMicrokernelTester()
2448 .batch_size(batch_size)
2449 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24), VUnOpMicrokernelTester::OpType::ELU);
2450 }
2451 }
2452
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X24,batch_gt_24)2453 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X24, batch_gt_24) {
2454 TEST_REQUIRES_X86_SSE2;
2455 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
2456 VUnOpMicrokernelTester()
2457 .batch_size(batch_size)
2458 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24), VUnOpMicrokernelTester::OpType::ELU);
2459 }
2460 }
2461
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X24,inplace)2462 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X24, inplace) {
2463 TEST_REQUIRES_X86_SSE2;
2464 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2465 VUnOpMicrokernelTester()
2466 .batch_size(batch_size)
2467 .inplace(true)
2468 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24), VUnOpMicrokernelTester::OpType::ELU);
2469 }
2470 }
2471
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X24,prescale)2472 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X24, prescale) {
2473 TEST_REQUIRES_X86_SSE2;
2474 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
2475 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2476 VUnOpMicrokernelTester()
2477 .batch_size(batch_size)
2478 .prescale(prescale)
2479 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24), VUnOpMicrokernelTester::OpType::ELU);
2480 }
2481 }
2482 }
2483
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X24,alpha)2484 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X24, alpha) {
2485 TEST_REQUIRES_X86_SSE2;
2486 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
2487 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2488 VUnOpMicrokernelTester()
2489 .batch_size(batch_size)
2490 .alpha(alpha)
2491 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24), VUnOpMicrokernelTester::OpType::ELU);
2492 }
2493 }
2494 }
2495
TEST(F32_VELU__SSE2_RR2_LUT16_P3_X24,beta)2496 TEST(F32_VELU__SSE2_RR2_LUT16_P3_X24, beta) {
2497 TEST_REQUIRES_X86_SSE2;
2498 for (float beta : std::vector<float>({0.3f, 3.0f})) {
2499 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2500 VUnOpMicrokernelTester()
2501 .batch_size(batch_size)
2502 .beta(beta)
2503 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24), VUnOpMicrokernelTester::OpType::ELU);
2504 }
2505 }
2506 }
2507 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2508
2509
2510 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE2_RR2_P6_X4,batch_eq_4)2511 TEST(F32_VELU__SSE2_RR2_P6_X4, batch_eq_4) {
2512 TEST_REQUIRES_X86_SSE2;
2513 VUnOpMicrokernelTester()
2514 .batch_size(4)
2515 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_p6_x4), VUnOpMicrokernelTester::OpType::ELU);
2516 }
2517
TEST(F32_VELU__SSE2_RR2_P6_X4,batch_div_4)2518 TEST(F32_VELU__SSE2_RR2_P6_X4, batch_div_4) {
2519 TEST_REQUIRES_X86_SSE2;
2520 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
2521 VUnOpMicrokernelTester()
2522 .batch_size(batch_size)
2523 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_p6_x4), VUnOpMicrokernelTester::OpType::ELU);
2524 }
2525 }
2526
TEST(F32_VELU__SSE2_RR2_P6_X4,batch_lt_4)2527 TEST(F32_VELU__SSE2_RR2_P6_X4, batch_lt_4) {
2528 TEST_REQUIRES_X86_SSE2;
2529 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
2530 VUnOpMicrokernelTester()
2531 .batch_size(batch_size)
2532 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_p6_x4), VUnOpMicrokernelTester::OpType::ELU);
2533 }
2534 }
2535
TEST(F32_VELU__SSE2_RR2_P6_X4,batch_gt_4)2536 TEST(F32_VELU__SSE2_RR2_P6_X4, batch_gt_4) {
2537 TEST_REQUIRES_X86_SSE2;
2538 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
2539 VUnOpMicrokernelTester()
2540 .batch_size(batch_size)
2541 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_p6_x4), VUnOpMicrokernelTester::OpType::ELU);
2542 }
2543 }
2544
TEST(F32_VELU__SSE2_RR2_P6_X4,inplace)2545 TEST(F32_VELU__SSE2_RR2_P6_X4, inplace) {
2546 TEST_REQUIRES_X86_SSE2;
2547 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2548 VUnOpMicrokernelTester()
2549 .batch_size(batch_size)
2550 .inplace(true)
2551 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_p6_x4), VUnOpMicrokernelTester::OpType::ELU);
2552 }
2553 }
2554
TEST(F32_VELU__SSE2_RR2_P6_X4,prescale)2555 TEST(F32_VELU__SSE2_RR2_P6_X4, prescale) {
2556 TEST_REQUIRES_X86_SSE2;
2557 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
2558 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2559 VUnOpMicrokernelTester()
2560 .batch_size(batch_size)
2561 .prescale(prescale)
2562 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_p6_x4), VUnOpMicrokernelTester::OpType::ELU);
2563 }
2564 }
2565 }
2566
TEST(F32_VELU__SSE2_RR2_P6_X4,alpha)2567 TEST(F32_VELU__SSE2_RR2_P6_X4, alpha) {
2568 TEST_REQUIRES_X86_SSE2;
2569 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
2570 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2571 VUnOpMicrokernelTester()
2572 .batch_size(batch_size)
2573 .alpha(alpha)
2574 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_p6_x4), VUnOpMicrokernelTester::OpType::ELU);
2575 }
2576 }
2577 }
2578
TEST(F32_VELU__SSE2_RR2_P6_X4,beta)2579 TEST(F32_VELU__SSE2_RR2_P6_X4, beta) {
2580 TEST_REQUIRES_X86_SSE2;
2581 for (float beta : std::vector<float>({0.3f, 3.0f})) {
2582 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
2583 VUnOpMicrokernelTester()
2584 .batch_size(batch_size)
2585 .beta(beta)
2586 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_p6_x4), VUnOpMicrokernelTester::OpType::ELU);
2587 }
2588 }
2589 }
2590 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2591
2592
2593 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE2_RR2_P6_X8,batch_eq_8)2594 TEST(F32_VELU__SSE2_RR2_P6_X8, batch_eq_8) {
2595 TEST_REQUIRES_X86_SSE2;
2596 VUnOpMicrokernelTester()
2597 .batch_size(8)
2598 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_p6_x8), VUnOpMicrokernelTester::OpType::ELU);
2599 }
2600
TEST(F32_VELU__SSE2_RR2_P6_X8,batch_div_8)2601 TEST(F32_VELU__SSE2_RR2_P6_X8, batch_div_8) {
2602 TEST_REQUIRES_X86_SSE2;
2603 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
2604 VUnOpMicrokernelTester()
2605 .batch_size(batch_size)
2606 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_p6_x8), VUnOpMicrokernelTester::OpType::ELU);
2607 }
2608 }
2609
TEST(F32_VELU__SSE2_RR2_P6_X8,batch_lt_8)2610 TEST(F32_VELU__SSE2_RR2_P6_X8, batch_lt_8) {
2611 TEST_REQUIRES_X86_SSE2;
2612 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
2613 VUnOpMicrokernelTester()
2614 .batch_size(batch_size)
2615 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_p6_x8), VUnOpMicrokernelTester::OpType::ELU);
2616 }
2617 }
2618
TEST(F32_VELU__SSE2_RR2_P6_X8,batch_gt_8)2619 TEST(F32_VELU__SSE2_RR2_P6_X8, batch_gt_8) {
2620 TEST_REQUIRES_X86_SSE2;
2621 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
2622 VUnOpMicrokernelTester()
2623 .batch_size(batch_size)
2624 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_p6_x8), VUnOpMicrokernelTester::OpType::ELU);
2625 }
2626 }
2627
TEST(F32_VELU__SSE2_RR2_P6_X8,inplace)2628 TEST(F32_VELU__SSE2_RR2_P6_X8, inplace) {
2629 TEST_REQUIRES_X86_SSE2;
2630 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2631 VUnOpMicrokernelTester()
2632 .batch_size(batch_size)
2633 .inplace(true)
2634 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_p6_x8), VUnOpMicrokernelTester::OpType::ELU);
2635 }
2636 }
2637
TEST(F32_VELU__SSE2_RR2_P6_X8,prescale)2638 TEST(F32_VELU__SSE2_RR2_P6_X8, prescale) {
2639 TEST_REQUIRES_X86_SSE2;
2640 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
2641 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2642 VUnOpMicrokernelTester()
2643 .batch_size(batch_size)
2644 .prescale(prescale)
2645 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_p6_x8), VUnOpMicrokernelTester::OpType::ELU);
2646 }
2647 }
2648 }
2649
TEST(F32_VELU__SSE2_RR2_P6_X8,alpha)2650 TEST(F32_VELU__SSE2_RR2_P6_X8, alpha) {
2651 TEST_REQUIRES_X86_SSE2;
2652 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
2653 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2654 VUnOpMicrokernelTester()
2655 .batch_size(batch_size)
2656 .alpha(alpha)
2657 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_p6_x8), VUnOpMicrokernelTester::OpType::ELU);
2658 }
2659 }
2660 }
2661
TEST(F32_VELU__SSE2_RR2_P6_X8,beta)2662 TEST(F32_VELU__SSE2_RR2_P6_X8, beta) {
2663 TEST_REQUIRES_X86_SSE2;
2664 for (float beta : std::vector<float>({0.3f, 3.0f})) {
2665 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
2666 VUnOpMicrokernelTester()
2667 .batch_size(batch_size)
2668 .beta(beta)
2669 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_p6_x8), VUnOpMicrokernelTester::OpType::ELU);
2670 }
2671 }
2672 }
2673 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2674
2675
2676 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE2_RR2_P6_X12,batch_eq_12)2677 TEST(F32_VELU__SSE2_RR2_P6_X12, batch_eq_12) {
2678 TEST_REQUIRES_X86_SSE2;
2679 VUnOpMicrokernelTester()
2680 .batch_size(12)
2681 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_p6_x12), VUnOpMicrokernelTester::OpType::ELU);
2682 }
2683
TEST(F32_VELU__SSE2_RR2_P6_X12,batch_div_12)2684 TEST(F32_VELU__SSE2_RR2_P6_X12, batch_div_12) {
2685 TEST_REQUIRES_X86_SSE2;
2686 for (size_t batch_size = 24; batch_size < 120; batch_size += 12) {
2687 VUnOpMicrokernelTester()
2688 .batch_size(batch_size)
2689 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_p6_x12), VUnOpMicrokernelTester::OpType::ELU);
2690 }
2691 }
2692
TEST(F32_VELU__SSE2_RR2_P6_X12,batch_lt_12)2693 TEST(F32_VELU__SSE2_RR2_P6_X12, batch_lt_12) {
2694 TEST_REQUIRES_X86_SSE2;
2695 for (size_t batch_size = 1; batch_size < 12; batch_size++) {
2696 VUnOpMicrokernelTester()
2697 .batch_size(batch_size)
2698 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_p6_x12), VUnOpMicrokernelTester::OpType::ELU);
2699 }
2700 }
2701
TEST(F32_VELU__SSE2_RR2_P6_X12,batch_gt_12)2702 TEST(F32_VELU__SSE2_RR2_P6_X12, batch_gt_12) {
2703 TEST_REQUIRES_X86_SSE2;
2704 for (size_t batch_size = 13; batch_size < 24; batch_size++) {
2705 VUnOpMicrokernelTester()
2706 .batch_size(batch_size)
2707 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_p6_x12), VUnOpMicrokernelTester::OpType::ELU);
2708 }
2709 }
2710
TEST(F32_VELU__SSE2_RR2_P6_X12,inplace)2711 TEST(F32_VELU__SSE2_RR2_P6_X12, inplace) {
2712 TEST_REQUIRES_X86_SSE2;
2713 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
2714 VUnOpMicrokernelTester()
2715 .batch_size(batch_size)
2716 .inplace(true)
2717 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_p6_x12), VUnOpMicrokernelTester::OpType::ELU);
2718 }
2719 }
2720
TEST(F32_VELU__SSE2_RR2_P6_X12,prescale)2721 TEST(F32_VELU__SSE2_RR2_P6_X12, prescale) {
2722 TEST_REQUIRES_X86_SSE2;
2723 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
2724 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
2725 VUnOpMicrokernelTester()
2726 .batch_size(batch_size)
2727 .prescale(prescale)
2728 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_p6_x12), VUnOpMicrokernelTester::OpType::ELU);
2729 }
2730 }
2731 }
2732
TEST(F32_VELU__SSE2_RR2_P6_X12,alpha)2733 TEST(F32_VELU__SSE2_RR2_P6_X12, alpha) {
2734 TEST_REQUIRES_X86_SSE2;
2735 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
2736 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
2737 VUnOpMicrokernelTester()
2738 .batch_size(batch_size)
2739 .alpha(alpha)
2740 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_p6_x12), VUnOpMicrokernelTester::OpType::ELU);
2741 }
2742 }
2743 }
2744
TEST(F32_VELU__SSE2_RR2_P6_X12,beta)2745 TEST(F32_VELU__SSE2_RR2_P6_X12, beta) {
2746 TEST_REQUIRES_X86_SSE2;
2747 for (float beta : std::vector<float>({0.3f, 3.0f})) {
2748 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
2749 VUnOpMicrokernelTester()
2750 .batch_size(batch_size)
2751 .beta(beta)
2752 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_p6_x12), VUnOpMicrokernelTester::OpType::ELU);
2753 }
2754 }
2755 }
2756 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2757
2758
2759 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE2_RR2_P6_X16,batch_eq_16)2760 TEST(F32_VELU__SSE2_RR2_P6_X16, batch_eq_16) {
2761 TEST_REQUIRES_X86_SSE2;
2762 VUnOpMicrokernelTester()
2763 .batch_size(16)
2764 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
2765 }
2766
TEST(F32_VELU__SSE2_RR2_P6_X16,batch_div_16)2767 TEST(F32_VELU__SSE2_RR2_P6_X16, batch_div_16) {
2768 TEST_REQUIRES_X86_SSE2;
2769 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
2770 VUnOpMicrokernelTester()
2771 .batch_size(batch_size)
2772 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
2773 }
2774 }
2775
TEST(F32_VELU__SSE2_RR2_P6_X16,batch_lt_16)2776 TEST(F32_VELU__SSE2_RR2_P6_X16, batch_lt_16) {
2777 TEST_REQUIRES_X86_SSE2;
2778 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
2779 VUnOpMicrokernelTester()
2780 .batch_size(batch_size)
2781 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
2782 }
2783 }
2784
TEST(F32_VELU__SSE2_RR2_P6_X16,batch_gt_16)2785 TEST(F32_VELU__SSE2_RR2_P6_X16, batch_gt_16) {
2786 TEST_REQUIRES_X86_SSE2;
2787 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
2788 VUnOpMicrokernelTester()
2789 .batch_size(batch_size)
2790 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
2791 }
2792 }
2793
TEST(F32_VELU__SSE2_RR2_P6_X16,inplace)2794 TEST(F32_VELU__SSE2_RR2_P6_X16, inplace) {
2795 TEST_REQUIRES_X86_SSE2;
2796 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2797 VUnOpMicrokernelTester()
2798 .batch_size(batch_size)
2799 .inplace(true)
2800 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
2801 }
2802 }
2803
TEST(F32_VELU__SSE2_RR2_P6_X16,prescale)2804 TEST(F32_VELU__SSE2_RR2_P6_X16, prescale) {
2805 TEST_REQUIRES_X86_SSE2;
2806 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
2807 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2808 VUnOpMicrokernelTester()
2809 .batch_size(batch_size)
2810 .prescale(prescale)
2811 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
2812 }
2813 }
2814 }
2815
TEST(F32_VELU__SSE2_RR2_P6_X16,alpha)2816 TEST(F32_VELU__SSE2_RR2_P6_X16, alpha) {
2817 TEST_REQUIRES_X86_SSE2;
2818 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
2819 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2820 VUnOpMicrokernelTester()
2821 .batch_size(batch_size)
2822 .alpha(alpha)
2823 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
2824 }
2825 }
2826 }
2827
TEST(F32_VELU__SSE2_RR2_P6_X16,beta)2828 TEST(F32_VELU__SSE2_RR2_P6_X16, beta) {
2829 TEST_REQUIRES_X86_SSE2;
2830 for (float beta : std::vector<float>({0.3f, 3.0f})) {
2831 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
2832 VUnOpMicrokernelTester()
2833 .batch_size(batch_size)
2834 .beta(beta)
2835 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
2836 }
2837 }
2838 }
2839 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2840
2841
2842 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE2_RR2_P6_X20,batch_eq_20)2843 TEST(F32_VELU__SSE2_RR2_P6_X20, batch_eq_20) {
2844 TEST_REQUIRES_X86_SSE2;
2845 VUnOpMicrokernelTester()
2846 .batch_size(20)
2847 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_p6_x20), VUnOpMicrokernelTester::OpType::ELU);
2848 }
2849
TEST(F32_VELU__SSE2_RR2_P6_X20,batch_div_20)2850 TEST(F32_VELU__SSE2_RR2_P6_X20, batch_div_20) {
2851 TEST_REQUIRES_X86_SSE2;
2852 for (size_t batch_size = 40; batch_size < 200; batch_size += 20) {
2853 VUnOpMicrokernelTester()
2854 .batch_size(batch_size)
2855 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_p6_x20), VUnOpMicrokernelTester::OpType::ELU);
2856 }
2857 }
2858
TEST(F32_VELU__SSE2_RR2_P6_X20,batch_lt_20)2859 TEST(F32_VELU__SSE2_RR2_P6_X20, batch_lt_20) {
2860 TEST_REQUIRES_X86_SSE2;
2861 for (size_t batch_size = 1; batch_size < 20; batch_size++) {
2862 VUnOpMicrokernelTester()
2863 .batch_size(batch_size)
2864 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_p6_x20), VUnOpMicrokernelTester::OpType::ELU);
2865 }
2866 }
2867
TEST(F32_VELU__SSE2_RR2_P6_X20,batch_gt_20)2868 TEST(F32_VELU__SSE2_RR2_P6_X20, batch_gt_20) {
2869 TEST_REQUIRES_X86_SSE2;
2870 for (size_t batch_size = 21; batch_size < 40; batch_size++) {
2871 VUnOpMicrokernelTester()
2872 .batch_size(batch_size)
2873 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_p6_x20), VUnOpMicrokernelTester::OpType::ELU);
2874 }
2875 }
2876
TEST(F32_VELU__SSE2_RR2_P6_X20,inplace)2877 TEST(F32_VELU__SSE2_RR2_P6_X20, inplace) {
2878 TEST_REQUIRES_X86_SSE2;
2879 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
2880 VUnOpMicrokernelTester()
2881 .batch_size(batch_size)
2882 .inplace(true)
2883 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_p6_x20), VUnOpMicrokernelTester::OpType::ELU);
2884 }
2885 }
2886
TEST(F32_VELU__SSE2_RR2_P6_X20,prescale)2887 TEST(F32_VELU__SSE2_RR2_P6_X20, prescale) {
2888 TEST_REQUIRES_X86_SSE2;
2889 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
2890 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
2891 VUnOpMicrokernelTester()
2892 .batch_size(batch_size)
2893 .prescale(prescale)
2894 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_p6_x20), VUnOpMicrokernelTester::OpType::ELU);
2895 }
2896 }
2897 }
2898
TEST(F32_VELU__SSE2_RR2_P6_X20,alpha)2899 TEST(F32_VELU__SSE2_RR2_P6_X20, alpha) {
2900 TEST_REQUIRES_X86_SSE2;
2901 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
2902 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
2903 VUnOpMicrokernelTester()
2904 .batch_size(batch_size)
2905 .alpha(alpha)
2906 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_p6_x20), VUnOpMicrokernelTester::OpType::ELU);
2907 }
2908 }
2909 }
2910
TEST(F32_VELU__SSE2_RR2_P6_X20,beta)2911 TEST(F32_VELU__SSE2_RR2_P6_X20, beta) {
2912 TEST_REQUIRES_X86_SSE2;
2913 for (float beta : std::vector<float>({0.3f, 3.0f})) {
2914 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
2915 VUnOpMicrokernelTester()
2916 .batch_size(batch_size)
2917 .beta(beta)
2918 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_p6_x20), VUnOpMicrokernelTester::OpType::ELU);
2919 }
2920 }
2921 }
2922 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
2923
2924
2925 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE2_RR2_P6_X24,batch_eq_24)2926 TEST(F32_VELU__SSE2_RR2_P6_X24, batch_eq_24) {
2927 TEST_REQUIRES_X86_SSE2;
2928 VUnOpMicrokernelTester()
2929 .batch_size(24)
2930 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_p6_x24), VUnOpMicrokernelTester::OpType::ELU);
2931 }
2932
TEST(F32_VELU__SSE2_RR2_P6_X24,batch_div_24)2933 TEST(F32_VELU__SSE2_RR2_P6_X24, batch_div_24) {
2934 TEST_REQUIRES_X86_SSE2;
2935 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
2936 VUnOpMicrokernelTester()
2937 .batch_size(batch_size)
2938 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_p6_x24), VUnOpMicrokernelTester::OpType::ELU);
2939 }
2940 }
2941
TEST(F32_VELU__SSE2_RR2_P6_X24,batch_lt_24)2942 TEST(F32_VELU__SSE2_RR2_P6_X24, batch_lt_24) {
2943 TEST_REQUIRES_X86_SSE2;
2944 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
2945 VUnOpMicrokernelTester()
2946 .batch_size(batch_size)
2947 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_p6_x24), VUnOpMicrokernelTester::OpType::ELU);
2948 }
2949 }
2950
TEST(F32_VELU__SSE2_RR2_P6_X24,batch_gt_24)2951 TEST(F32_VELU__SSE2_RR2_P6_X24, batch_gt_24) {
2952 TEST_REQUIRES_X86_SSE2;
2953 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
2954 VUnOpMicrokernelTester()
2955 .batch_size(batch_size)
2956 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_p6_x24), VUnOpMicrokernelTester::OpType::ELU);
2957 }
2958 }
2959
TEST(F32_VELU__SSE2_RR2_P6_X24,inplace)2960 TEST(F32_VELU__SSE2_RR2_P6_X24, inplace) {
2961 TEST_REQUIRES_X86_SSE2;
2962 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2963 VUnOpMicrokernelTester()
2964 .batch_size(batch_size)
2965 .inplace(true)
2966 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_p6_x24), VUnOpMicrokernelTester::OpType::ELU);
2967 }
2968 }
2969
TEST(F32_VELU__SSE2_RR2_P6_X24,prescale)2970 TEST(F32_VELU__SSE2_RR2_P6_X24, prescale) {
2971 TEST_REQUIRES_X86_SSE2;
2972 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
2973 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2974 VUnOpMicrokernelTester()
2975 .batch_size(batch_size)
2976 .prescale(prescale)
2977 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_p6_x24), VUnOpMicrokernelTester::OpType::ELU);
2978 }
2979 }
2980 }
2981
TEST(F32_VELU__SSE2_RR2_P6_X24,alpha)2982 TEST(F32_VELU__SSE2_RR2_P6_X24, alpha) {
2983 TEST_REQUIRES_X86_SSE2;
2984 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
2985 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2986 VUnOpMicrokernelTester()
2987 .batch_size(batch_size)
2988 .alpha(alpha)
2989 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_p6_x24), VUnOpMicrokernelTester::OpType::ELU);
2990 }
2991 }
2992 }
2993
TEST(F32_VELU__SSE2_RR2_P6_X24,beta)2994 TEST(F32_VELU__SSE2_RR2_P6_X24, beta) {
2995 TEST_REQUIRES_X86_SSE2;
2996 for (float beta : std::vector<float>({0.3f, 3.0f})) {
2997 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
2998 VUnOpMicrokernelTester()
2999 .batch_size(batch_size)
3000 .beta(beta)
3001 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse2_rr2_p6_x24), VUnOpMicrokernelTester::OpType::ELU);
3002 }
3003 }
3004 }
3005 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3006
3007
3008 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X4,batch_eq_4)3009 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X4, batch_eq_4) {
3010 TEST_REQUIRES_X86_SSE41;
3011 VUnOpMicrokernelTester()
3012 .batch_size(4)
3013 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4), VUnOpMicrokernelTester::OpType::ELU);
3014 }
3015
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X4,batch_div_4)3016 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X4, batch_div_4) {
3017 TEST_REQUIRES_X86_SSE41;
3018 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
3019 VUnOpMicrokernelTester()
3020 .batch_size(batch_size)
3021 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4), VUnOpMicrokernelTester::OpType::ELU);
3022 }
3023 }
3024
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X4,batch_lt_4)3025 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X4, batch_lt_4) {
3026 TEST_REQUIRES_X86_SSE41;
3027 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
3028 VUnOpMicrokernelTester()
3029 .batch_size(batch_size)
3030 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4), VUnOpMicrokernelTester::OpType::ELU);
3031 }
3032 }
3033
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X4,batch_gt_4)3034 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X4, batch_gt_4) {
3035 TEST_REQUIRES_X86_SSE41;
3036 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
3037 VUnOpMicrokernelTester()
3038 .batch_size(batch_size)
3039 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4), VUnOpMicrokernelTester::OpType::ELU);
3040 }
3041 }
3042
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X4,inplace)3043 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X4, inplace) {
3044 TEST_REQUIRES_X86_SSE41;
3045 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
3046 VUnOpMicrokernelTester()
3047 .batch_size(batch_size)
3048 .inplace(true)
3049 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4), VUnOpMicrokernelTester::OpType::ELU);
3050 }
3051 }
3052
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X4,prescale)3053 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X4, prescale) {
3054 TEST_REQUIRES_X86_SSE41;
3055 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
3056 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
3057 VUnOpMicrokernelTester()
3058 .batch_size(batch_size)
3059 .prescale(prescale)
3060 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4), VUnOpMicrokernelTester::OpType::ELU);
3061 }
3062 }
3063 }
3064
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X4,alpha)3065 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X4, alpha) {
3066 TEST_REQUIRES_X86_SSE41;
3067 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
3068 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
3069 VUnOpMicrokernelTester()
3070 .batch_size(batch_size)
3071 .alpha(alpha)
3072 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4), VUnOpMicrokernelTester::OpType::ELU);
3073 }
3074 }
3075 }
3076
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X4,beta)3077 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X4, beta) {
3078 TEST_REQUIRES_X86_SSE41;
3079 for (float beta : std::vector<float>({0.3f, 3.0f})) {
3080 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
3081 VUnOpMicrokernelTester()
3082 .batch_size(batch_size)
3083 .beta(beta)
3084 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x4), VUnOpMicrokernelTester::OpType::ELU);
3085 }
3086 }
3087 }
3088 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3089
3090
3091 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X8,batch_eq_8)3092 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X8, batch_eq_8) {
3093 TEST_REQUIRES_X86_SSE41;
3094 VUnOpMicrokernelTester()
3095 .batch_size(8)
3096 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8), VUnOpMicrokernelTester::OpType::ELU);
3097 }
3098
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X8,batch_div_8)3099 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X8, batch_div_8) {
3100 TEST_REQUIRES_X86_SSE41;
3101 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
3102 VUnOpMicrokernelTester()
3103 .batch_size(batch_size)
3104 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8), VUnOpMicrokernelTester::OpType::ELU);
3105 }
3106 }
3107
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X8,batch_lt_8)3108 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X8, batch_lt_8) {
3109 TEST_REQUIRES_X86_SSE41;
3110 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
3111 VUnOpMicrokernelTester()
3112 .batch_size(batch_size)
3113 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8), VUnOpMicrokernelTester::OpType::ELU);
3114 }
3115 }
3116
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X8,batch_gt_8)3117 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X8, batch_gt_8) {
3118 TEST_REQUIRES_X86_SSE41;
3119 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
3120 VUnOpMicrokernelTester()
3121 .batch_size(batch_size)
3122 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8), VUnOpMicrokernelTester::OpType::ELU);
3123 }
3124 }
3125
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X8,inplace)3126 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X8, inplace) {
3127 TEST_REQUIRES_X86_SSE41;
3128 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3129 VUnOpMicrokernelTester()
3130 .batch_size(batch_size)
3131 .inplace(true)
3132 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8), VUnOpMicrokernelTester::OpType::ELU);
3133 }
3134 }
3135
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X8,prescale)3136 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X8, prescale) {
3137 TEST_REQUIRES_X86_SSE41;
3138 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
3139 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3140 VUnOpMicrokernelTester()
3141 .batch_size(batch_size)
3142 .prescale(prescale)
3143 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8), VUnOpMicrokernelTester::OpType::ELU);
3144 }
3145 }
3146 }
3147
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X8,alpha)3148 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X8, alpha) {
3149 TEST_REQUIRES_X86_SSE41;
3150 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
3151 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3152 VUnOpMicrokernelTester()
3153 .batch_size(batch_size)
3154 .alpha(alpha)
3155 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8), VUnOpMicrokernelTester::OpType::ELU);
3156 }
3157 }
3158 }
3159
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X8,beta)3160 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X8, beta) {
3161 TEST_REQUIRES_X86_SSE41;
3162 for (float beta : std::vector<float>({0.3f, 3.0f})) {
3163 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3164 VUnOpMicrokernelTester()
3165 .batch_size(batch_size)
3166 .beta(beta)
3167 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8), VUnOpMicrokernelTester::OpType::ELU);
3168 }
3169 }
3170 }
3171 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3172
3173
3174 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X12,batch_eq_12)3175 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X12, batch_eq_12) {
3176 TEST_REQUIRES_X86_SSE41;
3177 VUnOpMicrokernelTester()
3178 .batch_size(12)
3179 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12), VUnOpMicrokernelTester::OpType::ELU);
3180 }
3181
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X12,batch_div_12)3182 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X12, batch_div_12) {
3183 TEST_REQUIRES_X86_SSE41;
3184 for (size_t batch_size = 24; batch_size < 120; batch_size += 12) {
3185 VUnOpMicrokernelTester()
3186 .batch_size(batch_size)
3187 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12), VUnOpMicrokernelTester::OpType::ELU);
3188 }
3189 }
3190
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X12,batch_lt_12)3191 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X12, batch_lt_12) {
3192 TEST_REQUIRES_X86_SSE41;
3193 for (size_t batch_size = 1; batch_size < 12; batch_size++) {
3194 VUnOpMicrokernelTester()
3195 .batch_size(batch_size)
3196 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12), VUnOpMicrokernelTester::OpType::ELU);
3197 }
3198 }
3199
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X12,batch_gt_12)3200 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X12, batch_gt_12) {
3201 TEST_REQUIRES_X86_SSE41;
3202 for (size_t batch_size = 13; batch_size < 24; batch_size++) {
3203 VUnOpMicrokernelTester()
3204 .batch_size(batch_size)
3205 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12), VUnOpMicrokernelTester::OpType::ELU);
3206 }
3207 }
3208
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X12,inplace)3209 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X12, inplace) {
3210 TEST_REQUIRES_X86_SSE41;
3211 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
3212 VUnOpMicrokernelTester()
3213 .batch_size(batch_size)
3214 .inplace(true)
3215 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12), VUnOpMicrokernelTester::OpType::ELU);
3216 }
3217 }
3218
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X12,prescale)3219 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X12, prescale) {
3220 TEST_REQUIRES_X86_SSE41;
3221 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
3222 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
3223 VUnOpMicrokernelTester()
3224 .batch_size(batch_size)
3225 .prescale(prescale)
3226 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12), VUnOpMicrokernelTester::OpType::ELU);
3227 }
3228 }
3229 }
3230
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X12,alpha)3231 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X12, alpha) {
3232 TEST_REQUIRES_X86_SSE41;
3233 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
3234 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
3235 VUnOpMicrokernelTester()
3236 .batch_size(batch_size)
3237 .alpha(alpha)
3238 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12), VUnOpMicrokernelTester::OpType::ELU);
3239 }
3240 }
3241 }
3242
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X12,beta)3243 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X12, beta) {
3244 TEST_REQUIRES_X86_SSE41;
3245 for (float beta : std::vector<float>({0.3f, 3.0f})) {
3246 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
3247 VUnOpMicrokernelTester()
3248 .batch_size(batch_size)
3249 .beta(beta)
3250 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12), VUnOpMicrokernelTester::OpType::ELU);
3251 }
3252 }
3253 }
3254 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3255
3256
3257 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X16,batch_eq_16)3258 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X16, batch_eq_16) {
3259 TEST_REQUIRES_X86_SSE41;
3260 VUnOpMicrokernelTester()
3261 .batch_size(16)
3262 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16), VUnOpMicrokernelTester::OpType::ELU);
3263 }
3264
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X16,batch_div_16)3265 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X16, batch_div_16) {
3266 TEST_REQUIRES_X86_SSE41;
3267 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
3268 VUnOpMicrokernelTester()
3269 .batch_size(batch_size)
3270 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16), VUnOpMicrokernelTester::OpType::ELU);
3271 }
3272 }
3273
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X16,batch_lt_16)3274 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X16, batch_lt_16) {
3275 TEST_REQUIRES_X86_SSE41;
3276 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
3277 VUnOpMicrokernelTester()
3278 .batch_size(batch_size)
3279 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16), VUnOpMicrokernelTester::OpType::ELU);
3280 }
3281 }
3282
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X16,batch_gt_16)3283 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X16, batch_gt_16) {
3284 TEST_REQUIRES_X86_SSE41;
3285 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
3286 VUnOpMicrokernelTester()
3287 .batch_size(batch_size)
3288 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16), VUnOpMicrokernelTester::OpType::ELU);
3289 }
3290 }
3291
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X16,inplace)3292 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X16, inplace) {
3293 TEST_REQUIRES_X86_SSE41;
3294 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3295 VUnOpMicrokernelTester()
3296 .batch_size(batch_size)
3297 .inplace(true)
3298 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16), VUnOpMicrokernelTester::OpType::ELU);
3299 }
3300 }
3301
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X16,prescale)3302 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X16, prescale) {
3303 TEST_REQUIRES_X86_SSE41;
3304 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
3305 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3306 VUnOpMicrokernelTester()
3307 .batch_size(batch_size)
3308 .prescale(prescale)
3309 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16), VUnOpMicrokernelTester::OpType::ELU);
3310 }
3311 }
3312 }
3313
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X16,alpha)3314 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X16, alpha) {
3315 TEST_REQUIRES_X86_SSE41;
3316 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
3317 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3318 VUnOpMicrokernelTester()
3319 .batch_size(batch_size)
3320 .alpha(alpha)
3321 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16), VUnOpMicrokernelTester::OpType::ELU);
3322 }
3323 }
3324 }
3325
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X16,beta)3326 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X16, beta) {
3327 TEST_REQUIRES_X86_SSE41;
3328 for (float beta : std::vector<float>({0.3f, 3.0f})) {
3329 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3330 VUnOpMicrokernelTester()
3331 .batch_size(batch_size)
3332 .beta(beta)
3333 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16), VUnOpMicrokernelTester::OpType::ELU);
3334 }
3335 }
3336 }
3337 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3338
3339
3340 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X20,batch_eq_20)3341 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X20, batch_eq_20) {
3342 TEST_REQUIRES_X86_SSE41;
3343 VUnOpMicrokernelTester()
3344 .batch_size(20)
3345 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20), VUnOpMicrokernelTester::OpType::ELU);
3346 }
3347
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X20,batch_div_20)3348 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X20, batch_div_20) {
3349 TEST_REQUIRES_X86_SSE41;
3350 for (size_t batch_size = 40; batch_size < 200; batch_size += 20) {
3351 VUnOpMicrokernelTester()
3352 .batch_size(batch_size)
3353 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20), VUnOpMicrokernelTester::OpType::ELU);
3354 }
3355 }
3356
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X20,batch_lt_20)3357 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X20, batch_lt_20) {
3358 TEST_REQUIRES_X86_SSE41;
3359 for (size_t batch_size = 1; batch_size < 20; batch_size++) {
3360 VUnOpMicrokernelTester()
3361 .batch_size(batch_size)
3362 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20), VUnOpMicrokernelTester::OpType::ELU);
3363 }
3364 }
3365
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X20,batch_gt_20)3366 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X20, batch_gt_20) {
3367 TEST_REQUIRES_X86_SSE41;
3368 for (size_t batch_size = 21; batch_size < 40; batch_size++) {
3369 VUnOpMicrokernelTester()
3370 .batch_size(batch_size)
3371 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20), VUnOpMicrokernelTester::OpType::ELU);
3372 }
3373 }
3374
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X20,inplace)3375 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X20, inplace) {
3376 TEST_REQUIRES_X86_SSE41;
3377 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
3378 VUnOpMicrokernelTester()
3379 .batch_size(batch_size)
3380 .inplace(true)
3381 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20), VUnOpMicrokernelTester::OpType::ELU);
3382 }
3383 }
3384
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X20,prescale)3385 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X20, prescale) {
3386 TEST_REQUIRES_X86_SSE41;
3387 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
3388 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
3389 VUnOpMicrokernelTester()
3390 .batch_size(batch_size)
3391 .prescale(prescale)
3392 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20), VUnOpMicrokernelTester::OpType::ELU);
3393 }
3394 }
3395 }
3396
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X20,alpha)3397 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X20, alpha) {
3398 TEST_REQUIRES_X86_SSE41;
3399 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
3400 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
3401 VUnOpMicrokernelTester()
3402 .batch_size(batch_size)
3403 .alpha(alpha)
3404 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20), VUnOpMicrokernelTester::OpType::ELU);
3405 }
3406 }
3407 }
3408
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X20,beta)3409 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X20, beta) {
3410 TEST_REQUIRES_X86_SSE41;
3411 for (float beta : std::vector<float>({0.3f, 3.0f})) {
3412 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
3413 VUnOpMicrokernelTester()
3414 .batch_size(batch_size)
3415 .beta(beta)
3416 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20), VUnOpMicrokernelTester::OpType::ELU);
3417 }
3418 }
3419 }
3420 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3421
3422
3423 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X24,batch_eq_24)3424 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X24, batch_eq_24) {
3425 TEST_REQUIRES_X86_SSE41;
3426 VUnOpMicrokernelTester()
3427 .batch_size(24)
3428 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24), VUnOpMicrokernelTester::OpType::ELU);
3429 }
3430
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X24,batch_div_24)3431 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X24, batch_div_24) {
3432 TEST_REQUIRES_X86_SSE41;
3433 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
3434 VUnOpMicrokernelTester()
3435 .batch_size(batch_size)
3436 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24), VUnOpMicrokernelTester::OpType::ELU);
3437 }
3438 }
3439
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X24,batch_lt_24)3440 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X24, batch_lt_24) {
3441 TEST_REQUIRES_X86_SSE41;
3442 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
3443 VUnOpMicrokernelTester()
3444 .batch_size(batch_size)
3445 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24), VUnOpMicrokernelTester::OpType::ELU);
3446 }
3447 }
3448
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X24,batch_gt_24)3449 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X24, batch_gt_24) {
3450 TEST_REQUIRES_X86_SSE41;
3451 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
3452 VUnOpMicrokernelTester()
3453 .batch_size(batch_size)
3454 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24), VUnOpMicrokernelTester::OpType::ELU);
3455 }
3456 }
3457
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X24,inplace)3458 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X24, inplace) {
3459 TEST_REQUIRES_X86_SSE41;
3460 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3461 VUnOpMicrokernelTester()
3462 .batch_size(batch_size)
3463 .inplace(true)
3464 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24), VUnOpMicrokernelTester::OpType::ELU);
3465 }
3466 }
3467
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X24,prescale)3468 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X24, prescale) {
3469 TEST_REQUIRES_X86_SSE41;
3470 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
3471 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3472 VUnOpMicrokernelTester()
3473 .batch_size(batch_size)
3474 .prescale(prescale)
3475 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24), VUnOpMicrokernelTester::OpType::ELU);
3476 }
3477 }
3478 }
3479
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X24,alpha)3480 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X24, alpha) {
3481 TEST_REQUIRES_X86_SSE41;
3482 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
3483 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3484 VUnOpMicrokernelTester()
3485 .batch_size(batch_size)
3486 .alpha(alpha)
3487 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24), VUnOpMicrokernelTester::OpType::ELU);
3488 }
3489 }
3490 }
3491
TEST(F32_VELU__SSE41_RR2_LUT16_P3_X24,beta)3492 TEST(F32_VELU__SSE41_RR2_LUT16_P3_X24, beta) {
3493 TEST_REQUIRES_X86_SSE41;
3494 for (float beta : std::vector<float>({0.3f, 3.0f})) {
3495 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3496 VUnOpMicrokernelTester()
3497 .batch_size(batch_size)
3498 .beta(beta)
3499 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24), VUnOpMicrokernelTester::OpType::ELU);
3500 }
3501 }
3502 }
3503 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3504
3505
3506 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE41_RR2_P6_X4,batch_eq_4)3507 TEST(F32_VELU__SSE41_RR2_P6_X4, batch_eq_4) {
3508 TEST_REQUIRES_X86_SSE41;
3509 VUnOpMicrokernelTester()
3510 .batch_size(4)
3511 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_p6_x4), VUnOpMicrokernelTester::OpType::ELU);
3512 }
3513
TEST(F32_VELU__SSE41_RR2_P6_X4,batch_div_4)3514 TEST(F32_VELU__SSE41_RR2_P6_X4, batch_div_4) {
3515 TEST_REQUIRES_X86_SSE41;
3516 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
3517 VUnOpMicrokernelTester()
3518 .batch_size(batch_size)
3519 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_p6_x4), VUnOpMicrokernelTester::OpType::ELU);
3520 }
3521 }
3522
TEST(F32_VELU__SSE41_RR2_P6_X4,batch_lt_4)3523 TEST(F32_VELU__SSE41_RR2_P6_X4, batch_lt_4) {
3524 TEST_REQUIRES_X86_SSE41;
3525 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
3526 VUnOpMicrokernelTester()
3527 .batch_size(batch_size)
3528 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_p6_x4), VUnOpMicrokernelTester::OpType::ELU);
3529 }
3530 }
3531
TEST(F32_VELU__SSE41_RR2_P6_X4,batch_gt_4)3532 TEST(F32_VELU__SSE41_RR2_P6_X4, batch_gt_4) {
3533 TEST_REQUIRES_X86_SSE41;
3534 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
3535 VUnOpMicrokernelTester()
3536 .batch_size(batch_size)
3537 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_p6_x4), VUnOpMicrokernelTester::OpType::ELU);
3538 }
3539 }
3540
TEST(F32_VELU__SSE41_RR2_P6_X4,inplace)3541 TEST(F32_VELU__SSE41_RR2_P6_X4, inplace) {
3542 TEST_REQUIRES_X86_SSE41;
3543 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
3544 VUnOpMicrokernelTester()
3545 .batch_size(batch_size)
3546 .inplace(true)
3547 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_p6_x4), VUnOpMicrokernelTester::OpType::ELU);
3548 }
3549 }
3550
TEST(F32_VELU__SSE41_RR2_P6_X4,prescale)3551 TEST(F32_VELU__SSE41_RR2_P6_X4, prescale) {
3552 TEST_REQUIRES_X86_SSE41;
3553 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
3554 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
3555 VUnOpMicrokernelTester()
3556 .batch_size(batch_size)
3557 .prescale(prescale)
3558 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_p6_x4), VUnOpMicrokernelTester::OpType::ELU);
3559 }
3560 }
3561 }
3562
TEST(F32_VELU__SSE41_RR2_P6_X4,alpha)3563 TEST(F32_VELU__SSE41_RR2_P6_X4, alpha) {
3564 TEST_REQUIRES_X86_SSE41;
3565 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
3566 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
3567 VUnOpMicrokernelTester()
3568 .batch_size(batch_size)
3569 .alpha(alpha)
3570 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_p6_x4), VUnOpMicrokernelTester::OpType::ELU);
3571 }
3572 }
3573 }
3574
TEST(F32_VELU__SSE41_RR2_P6_X4,beta)3575 TEST(F32_VELU__SSE41_RR2_P6_X4, beta) {
3576 TEST_REQUIRES_X86_SSE41;
3577 for (float beta : std::vector<float>({0.3f, 3.0f})) {
3578 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
3579 VUnOpMicrokernelTester()
3580 .batch_size(batch_size)
3581 .beta(beta)
3582 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_p6_x4), VUnOpMicrokernelTester::OpType::ELU);
3583 }
3584 }
3585 }
3586 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3587
3588
3589 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE41_RR2_P6_X8,batch_eq_8)3590 TEST(F32_VELU__SSE41_RR2_P6_X8, batch_eq_8) {
3591 TEST_REQUIRES_X86_SSE41;
3592 VUnOpMicrokernelTester()
3593 .batch_size(8)
3594 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_p6_x8), VUnOpMicrokernelTester::OpType::ELU);
3595 }
3596
TEST(F32_VELU__SSE41_RR2_P6_X8,batch_div_8)3597 TEST(F32_VELU__SSE41_RR2_P6_X8, batch_div_8) {
3598 TEST_REQUIRES_X86_SSE41;
3599 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
3600 VUnOpMicrokernelTester()
3601 .batch_size(batch_size)
3602 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_p6_x8), VUnOpMicrokernelTester::OpType::ELU);
3603 }
3604 }
3605
TEST(F32_VELU__SSE41_RR2_P6_X8,batch_lt_8)3606 TEST(F32_VELU__SSE41_RR2_P6_X8, batch_lt_8) {
3607 TEST_REQUIRES_X86_SSE41;
3608 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
3609 VUnOpMicrokernelTester()
3610 .batch_size(batch_size)
3611 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_p6_x8), VUnOpMicrokernelTester::OpType::ELU);
3612 }
3613 }
3614
TEST(F32_VELU__SSE41_RR2_P6_X8,batch_gt_8)3615 TEST(F32_VELU__SSE41_RR2_P6_X8, batch_gt_8) {
3616 TEST_REQUIRES_X86_SSE41;
3617 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
3618 VUnOpMicrokernelTester()
3619 .batch_size(batch_size)
3620 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_p6_x8), VUnOpMicrokernelTester::OpType::ELU);
3621 }
3622 }
3623
TEST(F32_VELU__SSE41_RR2_P6_X8,inplace)3624 TEST(F32_VELU__SSE41_RR2_P6_X8, inplace) {
3625 TEST_REQUIRES_X86_SSE41;
3626 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3627 VUnOpMicrokernelTester()
3628 .batch_size(batch_size)
3629 .inplace(true)
3630 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_p6_x8), VUnOpMicrokernelTester::OpType::ELU);
3631 }
3632 }
3633
TEST(F32_VELU__SSE41_RR2_P6_X8,prescale)3634 TEST(F32_VELU__SSE41_RR2_P6_X8, prescale) {
3635 TEST_REQUIRES_X86_SSE41;
3636 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
3637 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3638 VUnOpMicrokernelTester()
3639 .batch_size(batch_size)
3640 .prescale(prescale)
3641 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_p6_x8), VUnOpMicrokernelTester::OpType::ELU);
3642 }
3643 }
3644 }
3645
TEST(F32_VELU__SSE41_RR2_P6_X8,alpha)3646 TEST(F32_VELU__SSE41_RR2_P6_X8, alpha) {
3647 TEST_REQUIRES_X86_SSE41;
3648 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
3649 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3650 VUnOpMicrokernelTester()
3651 .batch_size(batch_size)
3652 .alpha(alpha)
3653 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_p6_x8), VUnOpMicrokernelTester::OpType::ELU);
3654 }
3655 }
3656 }
3657
TEST(F32_VELU__SSE41_RR2_P6_X8,beta)3658 TEST(F32_VELU__SSE41_RR2_P6_X8, beta) {
3659 TEST_REQUIRES_X86_SSE41;
3660 for (float beta : std::vector<float>({0.3f, 3.0f})) {
3661 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
3662 VUnOpMicrokernelTester()
3663 .batch_size(batch_size)
3664 .beta(beta)
3665 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_p6_x8), VUnOpMicrokernelTester::OpType::ELU);
3666 }
3667 }
3668 }
3669 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3670
3671
3672 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE41_RR2_P6_X12,batch_eq_12)3673 TEST(F32_VELU__SSE41_RR2_P6_X12, batch_eq_12) {
3674 TEST_REQUIRES_X86_SSE41;
3675 VUnOpMicrokernelTester()
3676 .batch_size(12)
3677 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_p6_x12), VUnOpMicrokernelTester::OpType::ELU);
3678 }
3679
TEST(F32_VELU__SSE41_RR2_P6_X12,batch_div_12)3680 TEST(F32_VELU__SSE41_RR2_P6_X12, batch_div_12) {
3681 TEST_REQUIRES_X86_SSE41;
3682 for (size_t batch_size = 24; batch_size < 120; batch_size += 12) {
3683 VUnOpMicrokernelTester()
3684 .batch_size(batch_size)
3685 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_p6_x12), VUnOpMicrokernelTester::OpType::ELU);
3686 }
3687 }
3688
TEST(F32_VELU__SSE41_RR2_P6_X12,batch_lt_12)3689 TEST(F32_VELU__SSE41_RR2_P6_X12, batch_lt_12) {
3690 TEST_REQUIRES_X86_SSE41;
3691 for (size_t batch_size = 1; batch_size < 12; batch_size++) {
3692 VUnOpMicrokernelTester()
3693 .batch_size(batch_size)
3694 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_p6_x12), VUnOpMicrokernelTester::OpType::ELU);
3695 }
3696 }
3697
TEST(F32_VELU__SSE41_RR2_P6_X12,batch_gt_12)3698 TEST(F32_VELU__SSE41_RR2_P6_X12, batch_gt_12) {
3699 TEST_REQUIRES_X86_SSE41;
3700 for (size_t batch_size = 13; batch_size < 24; batch_size++) {
3701 VUnOpMicrokernelTester()
3702 .batch_size(batch_size)
3703 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_p6_x12), VUnOpMicrokernelTester::OpType::ELU);
3704 }
3705 }
3706
TEST(F32_VELU__SSE41_RR2_P6_X12,inplace)3707 TEST(F32_VELU__SSE41_RR2_P6_X12, inplace) {
3708 TEST_REQUIRES_X86_SSE41;
3709 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
3710 VUnOpMicrokernelTester()
3711 .batch_size(batch_size)
3712 .inplace(true)
3713 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_p6_x12), VUnOpMicrokernelTester::OpType::ELU);
3714 }
3715 }
3716
TEST(F32_VELU__SSE41_RR2_P6_X12,prescale)3717 TEST(F32_VELU__SSE41_RR2_P6_X12, prescale) {
3718 TEST_REQUIRES_X86_SSE41;
3719 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
3720 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
3721 VUnOpMicrokernelTester()
3722 .batch_size(batch_size)
3723 .prescale(prescale)
3724 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_p6_x12), VUnOpMicrokernelTester::OpType::ELU);
3725 }
3726 }
3727 }
3728
TEST(F32_VELU__SSE41_RR2_P6_X12,alpha)3729 TEST(F32_VELU__SSE41_RR2_P6_X12, alpha) {
3730 TEST_REQUIRES_X86_SSE41;
3731 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
3732 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
3733 VUnOpMicrokernelTester()
3734 .batch_size(batch_size)
3735 .alpha(alpha)
3736 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_p6_x12), VUnOpMicrokernelTester::OpType::ELU);
3737 }
3738 }
3739 }
3740
TEST(F32_VELU__SSE41_RR2_P6_X12,beta)3741 TEST(F32_VELU__SSE41_RR2_P6_X12, beta) {
3742 TEST_REQUIRES_X86_SSE41;
3743 for (float beta : std::vector<float>({0.3f, 3.0f})) {
3744 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
3745 VUnOpMicrokernelTester()
3746 .batch_size(batch_size)
3747 .beta(beta)
3748 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_p6_x12), VUnOpMicrokernelTester::OpType::ELU);
3749 }
3750 }
3751 }
3752 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3753
3754
3755 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE41_RR2_P6_X16,batch_eq_16)3756 TEST(F32_VELU__SSE41_RR2_P6_X16, batch_eq_16) {
3757 TEST_REQUIRES_X86_SSE41;
3758 VUnOpMicrokernelTester()
3759 .batch_size(16)
3760 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
3761 }
3762
TEST(F32_VELU__SSE41_RR2_P6_X16,batch_div_16)3763 TEST(F32_VELU__SSE41_RR2_P6_X16, batch_div_16) {
3764 TEST_REQUIRES_X86_SSE41;
3765 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
3766 VUnOpMicrokernelTester()
3767 .batch_size(batch_size)
3768 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
3769 }
3770 }
3771
TEST(F32_VELU__SSE41_RR2_P6_X16,batch_lt_16)3772 TEST(F32_VELU__SSE41_RR2_P6_X16, batch_lt_16) {
3773 TEST_REQUIRES_X86_SSE41;
3774 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
3775 VUnOpMicrokernelTester()
3776 .batch_size(batch_size)
3777 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
3778 }
3779 }
3780
TEST(F32_VELU__SSE41_RR2_P6_X16,batch_gt_16)3781 TEST(F32_VELU__SSE41_RR2_P6_X16, batch_gt_16) {
3782 TEST_REQUIRES_X86_SSE41;
3783 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
3784 VUnOpMicrokernelTester()
3785 .batch_size(batch_size)
3786 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
3787 }
3788 }
3789
TEST(F32_VELU__SSE41_RR2_P6_X16,inplace)3790 TEST(F32_VELU__SSE41_RR2_P6_X16, inplace) {
3791 TEST_REQUIRES_X86_SSE41;
3792 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3793 VUnOpMicrokernelTester()
3794 .batch_size(batch_size)
3795 .inplace(true)
3796 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
3797 }
3798 }
3799
TEST(F32_VELU__SSE41_RR2_P6_X16,prescale)3800 TEST(F32_VELU__SSE41_RR2_P6_X16, prescale) {
3801 TEST_REQUIRES_X86_SSE41;
3802 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
3803 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3804 VUnOpMicrokernelTester()
3805 .batch_size(batch_size)
3806 .prescale(prescale)
3807 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
3808 }
3809 }
3810 }
3811
TEST(F32_VELU__SSE41_RR2_P6_X16,alpha)3812 TEST(F32_VELU__SSE41_RR2_P6_X16, alpha) {
3813 TEST_REQUIRES_X86_SSE41;
3814 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
3815 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3816 VUnOpMicrokernelTester()
3817 .batch_size(batch_size)
3818 .alpha(alpha)
3819 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
3820 }
3821 }
3822 }
3823
TEST(F32_VELU__SSE41_RR2_P6_X16,beta)3824 TEST(F32_VELU__SSE41_RR2_P6_X16, beta) {
3825 TEST_REQUIRES_X86_SSE41;
3826 for (float beta : std::vector<float>({0.3f, 3.0f})) {
3827 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
3828 VUnOpMicrokernelTester()
3829 .batch_size(batch_size)
3830 .beta(beta)
3831 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
3832 }
3833 }
3834 }
3835 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3836
3837
3838 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE41_RR2_P6_X20,batch_eq_20)3839 TEST(F32_VELU__SSE41_RR2_P6_X20, batch_eq_20) {
3840 TEST_REQUIRES_X86_SSE41;
3841 VUnOpMicrokernelTester()
3842 .batch_size(20)
3843 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_p6_x20), VUnOpMicrokernelTester::OpType::ELU);
3844 }
3845
TEST(F32_VELU__SSE41_RR2_P6_X20,batch_div_20)3846 TEST(F32_VELU__SSE41_RR2_P6_X20, batch_div_20) {
3847 TEST_REQUIRES_X86_SSE41;
3848 for (size_t batch_size = 40; batch_size < 200; batch_size += 20) {
3849 VUnOpMicrokernelTester()
3850 .batch_size(batch_size)
3851 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_p6_x20), VUnOpMicrokernelTester::OpType::ELU);
3852 }
3853 }
3854
TEST(F32_VELU__SSE41_RR2_P6_X20,batch_lt_20)3855 TEST(F32_VELU__SSE41_RR2_P6_X20, batch_lt_20) {
3856 TEST_REQUIRES_X86_SSE41;
3857 for (size_t batch_size = 1; batch_size < 20; batch_size++) {
3858 VUnOpMicrokernelTester()
3859 .batch_size(batch_size)
3860 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_p6_x20), VUnOpMicrokernelTester::OpType::ELU);
3861 }
3862 }
3863
TEST(F32_VELU__SSE41_RR2_P6_X20,batch_gt_20)3864 TEST(F32_VELU__SSE41_RR2_P6_X20, batch_gt_20) {
3865 TEST_REQUIRES_X86_SSE41;
3866 for (size_t batch_size = 21; batch_size < 40; batch_size++) {
3867 VUnOpMicrokernelTester()
3868 .batch_size(batch_size)
3869 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_p6_x20), VUnOpMicrokernelTester::OpType::ELU);
3870 }
3871 }
3872
TEST(F32_VELU__SSE41_RR2_P6_X20,inplace)3873 TEST(F32_VELU__SSE41_RR2_P6_X20, inplace) {
3874 TEST_REQUIRES_X86_SSE41;
3875 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
3876 VUnOpMicrokernelTester()
3877 .batch_size(batch_size)
3878 .inplace(true)
3879 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_p6_x20), VUnOpMicrokernelTester::OpType::ELU);
3880 }
3881 }
3882
TEST(F32_VELU__SSE41_RR2_P6_X20,prescale)3883 TEST(F32_VELU__SSE41_RR2_P6_X20, prescale) {
3884 TEST_REQUIRES_X86_SSE41;
3885 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
3886 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
3887 VUnOpMicrokernelTester()
3888 .batch_size(batch_size)
3889 .prescale(prescale)
3890 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_p6_x20), VUnOpMicrokernelTester::OpType::ELU);
3891 }
3892 }
3893 }
3894
TEST(F32_VELU__SSE41_RR2_P6_X20,alpha)3895 TEST(F32_VELU__SSE41_RR2_P6_X20, alpha) {
3896 TEST_REQUIRES_X86_SSE41;
3897 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
3898 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
3899 VUnOpMicrokernelTester()
3900 .batch_size(batch_size)
3901 .alpha(alpha)
3902 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_p6_x20), VUnOpMicrokernelTester::OpType::ELU);
3903 }
3904 }
3905 }
3906
TEST(F32_VELU__SSE41_RR2_P6_X20,beta)3907 TEST(F32_VELU__SSE41_RR2_P6_X20, beta) {
3908 TEST_REQUIRES_X86_SSE41;
3909 for (float beta : std::vector<float>({0.3f, 3.0f})) {
3910 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
3911 VUnOpMicrokernelTester()
3912 .batch_size(batch_size)
3913 .beta(beta)
3914 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_p6_x20), VUnOpMicrokernelTester::OpType::ELU);
3915 }
3916 }
3917 }
3918 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
3919
3920
3921 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__SSE41_RR2_P6_X24,batch_eq_24)3922 TEST(F32_VELU__SSE41_RR2_P6_X24, batch_eq_24) {
3923 TEST_REQUIRES_X86_SSE41;
3924 VUnOpMicrokernelTester()
3925 .batch_size(24)
3926 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_p6_x24), VUnOpMicrokernelTester::OpType::ELU);
3927 }
3928
TEST(F32_VELU__SSE41_RR2_P6_X24,batch_div_24)3929 TEST(F32_VELU__SSE41_RR2_P6_X24, batch_div_24) {
3930 TEST_REQUIRES_X86_SSE41;
3931 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
3932 VUnOpMicrokernelTester()
3933 .batch_size(batch_size)
3934 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_p6_x24), VUnOpMicrokernelTester::OpType::ELU);
3935 }
3936 }
3937
TEST(F32_VELU__SSE41_RR2_P6_X24,batch_lt_24)3938 TEST(F32_VELU__SSE41_RR2_P6_X24, batch_lt_24) {
3939 TEST_REQUIRES_X86_SSE41;
3940 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
3941 VUnOpMicrokernelTester()
3942 .batch_size(batch_size)
3943 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_p6_x24), VUnOpMicrokernelTester::OpType::ELU);
3944 }
3945 }
3946
TEST(F32_VELU__SSE41_RR2_P6_X24,batch_gt_24)3947 TEST(F32_VELU__SSE41_RR2_P6_X24, batch_gt_24) {
3948 TEST_REQUIRES_X86_SSE41;
3949 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
3950 VUnOpMicrokernelTester()
3951 .batch_size(batch_size)
3952 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_p6_x24), VUnOpMicrokernelTester::OpType::ELU);
3953 }
3954 }
3955
TEST(F32_VELU__SSE41_RR2_P6_X24,inplace)3956 TEST(F32_VELU__SSE41_RR2_P6_X24, inplace) {
3957 TEST_REQUIRES_X86_SSE41;
3958 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3959 VUnOpMicrokernelTester()
3960 .batch_size(batch_size)
3961 .inplace(true)
3962 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_p6_x24), VUnOpMicrokernelTester::OpType::ELU);
3963 }
3964 }
3965
TEST(F32_VELU__SSE41_RR2_P6_X24,prescale)3966 TEST(F32_VELU__SSE41_RR2_P6_X24, prescale) {
3967 TEST_REQUIRES_X86_SSE41;
3968 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
3969 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3970 VUnOpMicrokernelTester()
3971 .batch_size(batch_size)
3972 .prescale(prescale)
3973 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_p6_x24), VUnOpMicrokernelTester::OpType::ELU);
3974 }
3975 }
3976 }
3977
TEST(F32_VELU__SSE41_RR2_P6_X24,alpha)3978 TEST(F32_VELU__SSE41_RR2_P6_X24, alpha) {
3979 TEST_REQUIRES_X86_SSE41;
3980 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
3981 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3982 VUnOpMicrokernelTester()
3983 .batch_size(batch_size)
3984 .alpha(alpha)
3985 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_p6_x24), VUnOpMicrokernelTester::OpType::ELU);
3986 }
3987 }
3988 }
3989
TEST(F32_VELU__SSE41_RR2_P6_X24,beta)3990 TEST(F32_VELU__SSE41_RR2_P6_X24, beta) {
3991 TEST_REQUIRES_X86_SSE41;
3992 for (float beta : std::vector<float>({0.3f, 3.0f})) {
3993 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
3994 VUnOpMicrokernelTester()
3995 .batch_size(batch_size)
3996 .beta(beta)
3997 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__sse41_rr2_p6_x24), VUnOpMicrokernelTester::OpType::ELU);
3998 }
3999 }
4000 }
4001 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4002
4003
4004 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X8,batch_eq_8)4005 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X8, batch_eq_8) {
4006 TEST_REQUIRES_X86_AVX;
4007 VUnOpMicrokernelTester()
4008 .batch_size(8)
4009 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x8), VUnOpMicrokernelTester::OpType::ELU);
4010 }
4011
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X8,batch_div_8)4012 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X8, batch_div_8) {
4013 TEST_REQUIRES_X86_AVX;
4014 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
4015 VUnOpMicrokernelTester()
4016 .batch_size(batch_size)
4017 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x8), VUnOpMicrokernelTester::OpType::ELU);
4018 }
4019 }
4020
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X8,batch_lt_8)4021 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X8, batch_lt_8) {
4022 TEST_REQUIRES_X86_AVX;
4023 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
4024 VUnOpMicrokernelTester()
4025 .batch_size(batch_size)
4026 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x8), VUnOpMicrokernelTester::OpType::ELU);
4027 }
4028 }
4029
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X8,batch_gt_8)4030 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X8, batch_gt_8) {
4031 TEST_REQUIRES_X86_AVX;
4032 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
4033 VUnOpMicrokernelTester()
4034 .batch_size(batch_size)
4035 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x8), VUnOpMicrokernelTester::OpType::ELU);
4036 }
4037 }
4038
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X8,inplace)4039 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X8, inplace) {
4040 TEST_REQUIRES_X86_AVX;
4041 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4042 VUnOpMicrokernelTester()
4043 .batch_size(batch_size)
4044 .inplace(true)
4045 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x8), VUnOpMicrokernelTester::OpType::ELU);
4046 }
4047 }
4048
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X8,prescale)4049 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X8, prescale) {
4050 TEST_REQUIRES_X86_AVX;
4051 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
4052 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4053 VUnOpMicrokernelTester()
4054 .batch_size(batch_size)
4055 .prescale(prescale)
4056 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x8), VUnOpMicrokernelTester::OpType::ELU);
4057 }
4058 }
4059 }
4060
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X8,alpha)4061 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X8, alpha) {
4062 TEST_REQUIRES_X86_AVX;
4063 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
4064 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4065 VUnOpMicrokernelTester()
4066 .batch_size(batch_size)
4067 .alpha(alpha)
4068 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x8), VUnOpMicrokernelTester::OpType::ELU);
4069 }
4070 }
4071 }
4072
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X8,beta)4073 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X8, beta) {
4074 TEST_REQUIRES_X86_AVX;
4075 for (float beta : std::vector<float>({0.3f, 3.0f})) {
4076 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4077 VUnOpMicrokernelTester()
4078 .batch_size(batch_size)
4079 .beta(beta)
4080 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x8), VUnOpMicrokernelTester::OpType::ELU);
4081 }
4082 }
4083 }
4084 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4085
4086
4087 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X16,batch_eq_16)4088 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X16, batch_eq_16) {
4089 TEST_REQUIRES_X86_AVX;
4090 VUnOpMicrokernelTester()
4091 .batch_size(16)
4092 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x16), VUnOpMicrokernelTester::OpType::ELU);
4093 }
4094
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X16,batch_div_16)4095 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X16, batch_div_16) {
4096 TEST_REQUIRES_X86_AVX;
4097 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
4098 VUnOpMicrokernelTester()
4099 .batch_size(batch_size)
4100 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x16), VUnOpMicrokernelTester::OpType::ELU);
4101 }
4102 }
4103
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X16,batch_lt_16)4104 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X16, batch_lt_16) {
4105 TEST_REQUIRES_X86_AVX;
4106 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
4107 VUnOpMicrokernelTester()
4108 .batch_size(batch_size)
4109 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x16), VUnOpMicrokernelTester::OpType::ELU);
4110 }
4111 }
4112
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X16,batch_gt_16)4113 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X16, batch_gt_16) {
4114 TEST_REQUIRES_X86_AVX;
4115 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
4116 VUnOpMicrokernelTester()
4117 .batch_size(batch_size)
4118 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x16), VUnOpMicrokernelTester::OpType::ELU);
4119 }
4120 }
4121
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X16,inplace)4122 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X16, inplace) {
4123 TEST_REQUIRES_X86_AVX;
4124 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4125 VUnOpMicrokernelTester()
4126 .batch_size(batch_size)
4127 .inplace(true)
4128 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x16), VUnOpMicrokernelTester::OpType::ELU);
4129 }
4130 }
4131
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X16,prescale)4132 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X16, prescale) {
4133 TEST_REQUIRES_X86_AVX;
4134 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
4135 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4136 VUnOpMicrokernelTester()
4137 .batch_size(batch_size)
4138 .prescale(prescale)
4139 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x16), VUnOpMicrokernelTester::OpType::ELU);
4140 }
4141 }
4142 }
4143
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X16,alpha)4144 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X16, alpha) {
4145 TEST_REQUIRES_X86_AVX;
4146 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
4147 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4148 VUnOpMicrokernelTester()
4149 .batch_size(batch_size)
4150 .alpha(alpha)
4151 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x16), VUnOpMicrokernelTester::OpType::ELU);
4152 }
4153 }
4154 }
4155
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X16,beta)4156 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X16, beta) {
4157 TEST_REQUIRES_X86_AVX;
4158 for (float beta : std::vector<float>({0.3f, 3.0f})) {
4159 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4160 VUnOpMicrokernelTester()
4161 .batch_size(batch_size)
4162 .beta(beta)
4163 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x16), VUnOpMicrokernelTester::OpType::ELU);
4164 }
4165 }
4166 }
4167 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4168
4169
4170 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X24,batch_eq_24)4171 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X24, batch_eq_24) {
4172 TEST_REQUIRES_X86_AVX;
4173 VUnOpMicrokernelTester()
4174 .batch_size(24)
4175 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x24), VUnOpMicrokernelTester::OpType::ELU);
4176 }
4177
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X24,batch_div_24)4178 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X24, batch_div_24) {
4179 TEST_REQUIRES_X86_AVX;
4180 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
4181 VUnOpMicrokernelTester()
4182 .batch_size(batch_size)
4183 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x24), VUnOpMicrokernelTester::OpType::ELU);
4184 }
4185 }
4186
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X24,batch_lt_24)4187 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X24, batch_lt_24) {
4188 TEST_REQUIRES_X86_AVX;
4189 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
4190 VUnOpMicrokernelTester()
4191 .batch_size(batch_size)
4192 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x24), VUnOpMicrokernelTester::OpType::ELU);
4193 }
4194 }
4195
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X24,batch_gt_24)4196 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X24, batch_gt_24) {
4197 TEST_REQUIRES_X86_AVX;
4198 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
4199 VUnOpMicrokernelTester()
4200 .batch_size(batch_size)
4201 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x24), VUnOpMicrokernelTester::OpType::ELU);
4202 }
4203 }
4204
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X24,inplace)4205 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X24, inplace) {
4206 TEST_REQUIRES_X86_AVX;
4207 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4208 VUnOpMicrokernelTester()
4209 .batch_size(batch_size)
4210 .inplace(true)
4211 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x24), VUnOpMicrokernelTester::OpType::ELU);
4212 }
4213 }
4214
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X24,prescale)4215 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X24, prescale) {
4216 TEST_REQUIRES_X86_AVX;
4217 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
4218 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4219 VUnOpMicrokernelTester()
4220 .batch_size(batch_size)
4221 .prescale(prescale)
4222 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x24), VUnOpMicrokernelTester::OpType::ELU);
4223 }
4224 }
4225 }
4226
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X24,alpha)4227 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X24, alpha) {
4228 TEST_REQUIRES_X86_AVX;
4229 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
4230 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4231 VUnOpMicrokernelTester()
4232 .batch_size(batch_size)
4233 .alpha(alpha)
4234 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x24), VUnOpMicrokernelTester::OpType::ELU);
4235 }
4236 }
4237 }
4238
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X24,beta)4239 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X24, beta) {
4240 TEST_REQUIRES_X86_AVX;
4241 for (float beta : std::vector<float>({0.3f, 3.0f})) {
4242 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4243 VUnOpMicrokernelTester()
4244 .batch_size(batch_size)
4245 .beta(beta)
4246 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x24), VUnOpMicrokernelTester::OpType::ELU);
4247 }
4248 }
4249 }
4250 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4251
4252
4253 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X32,batch_eq_32)4254 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X32, batch_eq_32) {
4255 TEST_REQUIRES_X86_AVX;
4256 VUnOpMicrokernelTester()
4257 .batch_size(32)
4258 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x32), VUnOpMicrokernelTester::OpType::ELU);
4259 }
4260
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X32,batch_div_32)4261 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X32, batch_div_32) {
4262 TEST_REQUIRES_X86_AVX;
4263 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
4264 VUnOpMicrokernelTester()
4265 .batch_size(batch_size)
4266 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x32), VUnOpMicrokernelTester::OpType::ELU);
4267 }
4268 }
4269
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X32,batch_lt_32)4270 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X32, batch_lt_32) {
4271 TEST_REQUIRES_X86_AVX;
4272 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
4273 VUnOpMicrokernelTester()
4274 .batch_size(batch_size)
4275 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x32), VUnOpMicrokernelTester::OpType::ELU);
4276 }
4277 }
4278
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X32,batch_gt_32)4279 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X32, batch_gt_32) {
4280 TEST_REQUIRES_X86_AVX;
4281 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
4282 VUnOpMicrokernelTester()
4283 .batch_size(batch_size)
4284 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x32), VUnOpMicrokernelTester::OpType::ELU);
4285 }
4286 }
4287
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X32,inplace)4288 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X32, inplace) {
4289 TEST_REQUIRES_X86_AVX;
4290 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4291 VUnOpMicrokernelTester()
4292 .batch_size(batch_size)
4293 .inplace(true)
4294 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x32), VUnOpMicrokernelTester::OpType::ELU);
4295 }
4296 }
4297
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X32,prescale)4298 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X32, prescale) {
4299 TEST_REQUIRES_X86_AVX;
4300 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
4301 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4302 VUnOpMicrokernelTester()
4303 .batch_size(batch_size)
4304 .prescale(prescale)
4305 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x32), VUnOpMicrokernelTester::OpType::ELU);
4306 }
4307 }
4308 }
4309
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X32,alpha)4310 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X32, alpha) {
4311 TEST_REQUIRES_X86_AVX;
4312 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
4313 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4314 VUnOpMicrokernelTester()
4315 .batch_size(batch_size)
4316 .alpha(alpha)
4317 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x32), VUnOpMicrokernelTester::OpType::ELU);
4318 }
4319 }
4320 }
4321
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X32,beta)4322 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X32, beta) {
4323 TEST_REQUIRES_X86_AVX;
4324 for (float beta : std::vector<float>({0.3f, 3.0f})) {
4325 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4326 VUnOpMicrokernelTester()
4327 .batch_size(batch_size)
4328 .beta(beta)
4329 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x32), VUnOpMicrokernelTester::OpType::ELU);
4330 }
4331 }
4332 }
4333 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4334
4335
4336 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X40,batch_eq_40)4337 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X40, batch_eq_40) {
4338 TEST_REQUIRES_X86_AVX;
4339 VUnOpMicrokernelTester()
4340 .batch_size(40)
4341 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x40), VUnOpMicrokernelTester::OpType::ELU);
4342 }
4343
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X40,batch_div_40)4344 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X40, batch_div_40) {
4345 TEST_REQUIRES_X86_AVX;
4346 for (size_t batch_size = 80; batch_size < 400; batch_size += 40) {
4347 VUnOpMicrokernelTester()
4348 .batch_size(batch_size)
4349 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x40), VUnOpMicrokernelTester::OpType::ELU);
4350 }
4351 }
4352
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X40,batch_lt_40)4353 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X40, batch_lt_40) {
4354 TEST_REQUIRES_X86_AVX;
4355 for (size_t batch_size = 1; batch_size < 40; batch_size++) {
4356 VUnOpMicrokernelTester()
4357 .batch_size(batch_size)
4358 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x40), VUnOpMicrokernelTester::OpType::ELU);
4359 }
4360 }
4361
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X40,batch_gt_40)4362 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X40, batch_gt_40) {
4363 TEST_REQUIRES_X86_AVX;
4364 for (size_t batch_size = 41; batch_size < 80; batch_size++) {
4365 VUnOpMicrokernelTester()
4366 .batch_size(batch_size)
4367 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x40), VUnOpMicrokernelTester::OpType::ELU);
4368 }
4369 }
4370
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X40,inplace)4371 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X40, inplace) {
4372 TEST_REQUIRES_X86_AVX;
4373 for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
4374 VUnOpMicrokernelTester()
4375 .batch_size(batch_size)
4376 .inplace(true)
4377 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x40), VUnOpMicrokernelTester::OpType::ELU);
4378 }
4379 }
4380
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X40,prescale)4381 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X40, prescale) {
4382 TEST_REQUIRES_X86_AVX;
4383 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
4384 for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
4385 VUnOpMicrokernelTester()
4386 .batch_size(batch_size)
4387 .prescale(prescale)
4388 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x40), VUnOpMicrokernelTester::OpType::ELU);
4389 }
4390 }
4391 }
4392
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X40,alpha)4393 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X40, alpha) {
4394 TEST_REQUIRES_X86_AVX;
4395 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
4396 for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
4397 VUnOpMicrokernelTester()
4398 .batch_size(batch_size)
4399 .alpha(alpha)
4400 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x40), VUnOpMicrokernelTester::OpType::ELU);
4401 }
4402 }
4403 }
4404
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X40,beta)4405 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X40, beta) {
4406 TEST_REQUIRES_X86_AVX;
4407 for (float beta : std::vector<float>({0.3f, 3.0f})) {
4408 for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
4409 VUnOpMicrokernelTester()
4410 .batch_size(batch_size)
4411 .beta(beta)
4412 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x40), VUnOpMicrokernelTester::OpType::ELU);
4413 }
4414 }
4415 }
4416 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4417
4418
4419 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X48,batch_eq_48)4420 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X48, batch_eq_48) {
4421 TEST_REQUIRES_X86_AVX;
4422 VUnOpMicrokernelTester()
4423 .batch_size(48)
4424 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x48), VUnOpMicrokernelTester::OpType::ELU);
4425 }
4426
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X48,batch_div_48)4427 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X48, batch_div_48) {
4428 TEST_REQUIRES_X86_AVX;
4429 for (size_t batch_size = 96; batch_size < 480; batch_size += 48) {
4430 VUnOpMicrokernelTester()
4431 .batch_size(batch_size)
4432 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x48), VUnOpMicrokernelTester::OpType::ELU);
4433 }
4434 }
4435
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X48,batch_lt_48)4436 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X48, batch_lt_48) {
4437 TEST_REQUIRES_X86_AVX;
4438 for (size_t batch_size = 1; batch_size < 48; batch_size++) {
4439 VUnOpMicrokernelTester()
4440 .batch_size(batch_size)
4441 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x48), VUnOpMicrokernelTester::OpType::ELU);
4442 }
4443 }
4444
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X48,batch_gt_48)4445 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X48, batch_gt_48) {
4446 TEST_REQUIRES_X86_AVX;
4447 for (size_t batch_size = 49; batch_size < 96; batch_size++) {
4448 VUnOpMicrokernelTester()
4449 .batch_size(batch_size)
4450 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x48), VUnOpMicrokernelTester::OpType::ELU);
4451 }
4452 }
4453
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X48,inplace)4454 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X48, inplace) {
4455 TEST_REQUIRES_X86_AVX;
4456 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
4457 VUnOpMicrokernelTester()
4458 .batch_size(batch_size)
4459 .inplace(true)
4460 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x48), VUnOpMicrokernelTester::OpType::ELU);
4461 }
4462 }
4463
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X48,prescale)4464 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X48, prescale) {
4465 TEST_REQUIRES_X86_AVX;
4466 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
4467 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
4468 VUnOpMicrokernelTester()
4469 .batch_size(batch_size)
4470 .prescale(prescale)
4471 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x48), VUnOpMicrokernelTester::OpType::ELU);
4472 }
4473 }
4474 }
4475
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X48,alpha)4476 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X48, alpha) {
4477 TEST_REQUIRES_X86_AVX;
4478 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
4479 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
4480 VUnOpMicrokernelTester()
4481 .batch_size(batch_size)
4482 .alpha(alpha)
4483 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x48), VUnOpMicrokernelTester::OpType::ELU);
4484 }
4485 }
4486 }
4487
TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X48,beta)4488 TEST(F32_VELU__AVX_RR2_LUT4_P4_PERM_X48, beta) {
4489 TEST_REQUIRES_X86_AVX;
4490 for (float beta : std::vector<float>({0.3f, 3.0f})) {
4491 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
4492 VUnOpMicrokernelTester()
4493 .batch_size(batch_size)
4494 .beta(beta)
4495 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x48), VUnOpMicrokernelTester::OpType::ELU);
4496 }
4497 }
4498 }
4499 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4500
4501
4502 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX_RR2_LUT16_P3_X8,batch_eq_8)4503 TEST(F32_VELU__AVX_RR2_LUT16_P3_X8, batch_eq_8) {
4504 TEST_REQUIRES_X86_AVX;
4505 VUnOpMicrokernelTester()
4506 .batch_size(8)
4507 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8), VUnOpMicrokernelTester::OpType::ELU);
4508 }
4509
TEST(F32_VELU__AVX_RR2_LUT16_P3_X8,batch_div_8)4510 TEST(F32_VELU__AVX_RR2_LUT16_P3_X8, batch_div_8) {
4511 TEST_REQUIRES_X86_AVX;
4512 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
4513 VUnOpMicrokernelTester()
4514 .batch_size(batch_size)
4515 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8), VUnOpMicrokernelTester::OpType::ELU);
4516 }
4517 }
4518
TEST(F32_VELU__AVX_RR2_LUT16_P3_X8,batch_lt_8)4519 TEST(F32_VELU__AVX_RR2_LUT16_P3_X8, batch_lt_8) {
4520 TEST_REQUIRES_X86_AVX;
4521 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
4522 VUnOpMicrokernelTester()
4523 .batch_size(batch_size)
4524 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8), VUnOpMicrokernelTester::OpType::ELU);
4525 }
4526 }
4527
TEST(F32_VELU__AVX_RR2_LUT16_P3_X8,batch_gt_8)4528 TEST(F32_VELU__AVX_RR2_LUT16_P3_X8, batch_gt_8) {
4529 TEST_REQUIRES_X86_AVX;
4530 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
4531 VUnOpMicrokernelTester()
4532 .batch_size(batch_size)
4533 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8), VUnOpMicrokernelTester::OpType::ELU);
4534 }
4535 }
4536
TEST(F32_VELU__AVX_RR2_LUT16_P3_X8,inplace)4537 TEST(F32_VELU__AVX_RR2_LUT16_P3_X8, inplace) {
4538 TEST_REQUIRES_X86_AVX;
4539 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4540 VUnOpMicrokernelTester()
4541 .batch_size(batch_size)
4542 .inplace(true)
4543 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8), VUnOpMicrokernelTester::OpType::ELU);
4544 }
4545 }
4546
TEST(F32_VELU__AVX_RR2_LUT16_P3_X8,prescale)4547 TEST(F32_VELU__AVX_RR2_LUT16_P3_X8, prescale) {
4548 TEST_REQUIRES_X86_AVX;
4549 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
4550 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4551 VUnOpMicrokernelTester()
4552 .batch_size(batch_size)
4553 .prescale(prescale)
4554 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8), VUnOpMicrokernelTester::OpType::ELU);
4555 }
4556 }
4557 }
4558
TEST(F32_VELU__AVX_RR2_LUT16_P3_X8,alpha)4559 TEST(F32_VELU__AVX_RR2_LUT16_P3_X8, alpha) {
4560 TEST_REQUIRES_X86_AVX;
4561 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
4562 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4563 VUnOpMicrokernelTester()
4564 .batch_size(batch_size)
4565 .alpha(alpha)
4566 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8), VUnOpMicrokernelTester::OpType::ELU);
4567 }
4568 }
4569 }
4570
TEST(F32_VELU__AVX_RR2_LUT16_P3_X8,beta)4571 TEST(F32_VELU__AVX_RR2_LUT16_P3_X8, beta) {
4572 TEST_REQUIRES_X86_AVX;
4573 for (float beta : std::vector<float>({0.3f, 3.0f})) {
4574 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
4575 VUnOpMicrokernelTester()
4576 .batch_size(batch_size)
4577 .beta(beta)
4578 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x8), VUnOpMicrokernelTester::OpType::ELU);
4579 }
4580 }
4581 }
4582 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4583
4584
4585 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX_RR2_LUT16_P3_X16,batch_eq_16)4586 TEST(F32_VELU__AVX_RR2_LUT16_P3_X16, batch_eq_16) {
4587 TEST_REQUIRES_X86_AVX;
4588 VUnOpMicrokernelTester()
4589 .batch_size(16)
4590 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16), VUnOpMicrokernelTester::OpType::ELU);
4591 }
4592
TEST(F32_VELU__AVX_RR2_LUT16_P3_X16,batch_div_16)4593 TEST(F32_VELU__AVX_RR2_LUT16_P3_X16, batch_div_16) {
4594 TEST_REQUIRES_X86_AVX;
4595 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
4596 VUnOpMicrokernelTester()
4597 .batch_size(batch_size)
4598 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16), VUnOpMicrokernelTester::OpType::ELU);
4599 }
4600 }
4601
TEST(F32_VELU__AVX_RR2_LUT16_P3_X16,batch_lt_16)4602 TEST(F32_VELU__AVX_RR2_LUT16_P3_X16, batch_lt_16) {
4603 TEST_REQUIRES_X86_AVX;
4604 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
4605 VUnOpMicrokernelTester()
4606 .batch_size(batch_size)
4607 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16), VUnOpMicrokernelTester::OpType::ELU);
4608 }
4609 }
4610
TEST(F32_VELU__AVX_RR2_LUT16_P3_X16,batch_gt_16)4611 TEST(F32_VELU__AVX_RR2_LUT16_P3_X16, batch_gt_16) {
4612 TEST_REQUIRES_X86_AVX;
4613 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
4614 VUnOpMicrokernelTester()
4615 .batch_size(batch_size)
4616 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16), VUnOpMicrokernelTester::OpType::ELU);
4617 }
4618 }
4619
TEST(F32_VELU__AVX_RR2_LUT16_P3_X16,inplace)4620 TEST(F32_VELU__AVX_RR2_LUT16_P3_X16, inplace) {
4621 TEST_REQUIRES_X86_AVX;
4622 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4623 VUnOpMicrokernelTester()
4624 .batch_size(batch_size)
4625 .inplace(true)
4626 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16), VUnOpMicrokernelTester::OpType::ELU);
4627 }
4628 }
4629
TEST(F32_VELU__AVX_RR2_LUT16_P3_X16,prescale)4630 TEST(F32_VELU__AVX_RR2_LUT16_P3_X16, prescale) {
4631 TEST_REQUIRES_X86_AVX;
4632 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
4633 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4634 VUnOpMicrokernelTester()
4635 .batch_size(batch_size)
4636 .prescale(prescale)
4637 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16), VUnOpMicrokernelTester::OpType::ELU);
4638 }
4639 }
4640 }
4641
TEST(F32_VELU__AVX_RR2_LUT16_P3_X16,alpha)4642 TEST(F32_VELU__AVX_RR2_LUT16_P3_X16, alpha) {
4643 TEST_REQUIRES_X86_AVX;
4644 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
4645 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4646 VUnOpMicrokernelTester()
4647 .batch_size(batch_size)
4648 .alpha(alpha)
4649 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16), VUnOpMicrokernelTester::OpType::ELU);
4650 }
4651 }
4652 }
4653
TEST(F32_VELU__AVX_RR2_LUT16_P3_X16,beta)4654 TEST(F32_VELU__AVX_RR2_LUT16_P3_X16, beta) {
4655 TEST_REQUIRES_X86_AVX;
4656 for (float beta : std::vector<float>({0.3f, 3.0f})) {
4657 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
4658 VUnOpMicrokernelTester()
4659 .batch_size(batch_size)
4660 .beta(beta)
4661 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x16), VUnOpMicrokernelTester::OpType::ELU);
4662 }
4663 }
4664 }
4665 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4666
4667
4668 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX_RR2_LUT16_P3_X24,batch_eq_24)4669 TEST(F32_VELU__AVX_RR2_LUT16_P3_X24, batch_eq_24) {
4670 TEST_REQUIRES_X86_AVX;
4671 VUnOpMicrokernelTester()
4672 .batch_size(24)
4673 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24), VUnOpMicrokernelTester::OpType::ELU);
4674 }
4675
TEST(F32_VELU__AVX_RR2_LUT16_P3_X24,batch_div_24)4676 TEST(F32_VELU__AVX_RR2_LUT16_P3_X24, batch_div_24) {
4677 TEST_REQUIRES_X86_AVX;
4678 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
4679 VUnOpMicrokernelTester()
4680 .batch_size(batch_size)
4681 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24), VUnOpMicrokernelTester::OpType::ELU);
4682 }
4683 }
4684
TEST(F32_VELU__AVX_RR2_LUT16_P3_X24,batch_lt_24)4685 TEST(F32_VELU__AVX_RR2_LUT16_P3_X24, batch_lt_24) {
4686 TEST_REQUIRES_X86_AVX;
4687 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
4688 VUnOpMicrokernelTester()
4689 .batch_size(batch_size)
4690 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24), VUnOpMicrokernelTester::OpType::ELU);
4691 }
4692 }
4693
TEST(F32_VELU__AVX_RR2_LUT16_P3_X24,batch_gt_24)4694 TEST(F32_VELU__AVX_RR2_LUT16_P3_X24, batch_gt_24) {
4695 TEST_REQUIRES_X86_AVX;
4696 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
4697 VUnOpMicrokernelTester()
4698 .batch_size(batch_size)
4699 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24), VUnOpMicrokernelTester::OpType::ELU);
4700 }
4701 }
4702
TEST(F32_VELU__AVX_RR2_LUT16_P3_X24,inplace)4703 TEST(F32_VELU__AVX_RR2_LUT16_P3_X24, inplace) {
4704 TEST_REQUIRES_X86_AVX;
4705 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4706 VUnOpMicrokernelTester()
4707 .batch_size(batch_size)
4708 .inplace(true)
4709 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24), VUnOpMicrokernelTester::OpType::ELU);
4710 }
4711 }
4712
TEST(F32_VELU__AVX_RR2_LUT16_P3_X24,prescale)4713 TEST(F32_VELU__AVX_RR2_LUT16_P3_X24, prescale) {
4714 TEST_REQUIRES_X86_AVX;
4715 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
4716 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4717 VUnOpMicrokernelTester()
4718 .batch_size(batch_size)
4719 .prescale(prescale)
4720 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24), VUnOpMicrokernelTester::OpType::ELU);
4721 }
4722 }
4723 }
4724
TEST(F32_VELU__AVX_RR2_LUT16_P3_X24,alpha)4725 TEST(F32_VELU__AVX_RR2_LUT16_P3_X24, alpha) {
4726 TEST_REQUIRES_X86_AVX;
4727 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
4728 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4729 VUnOpMicrokernelTester()
4730 .batch_size(batch_size)
4731 .alpha(alpha)
4732 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24), VUnOpMicrokernelTester::OpType::ELU);
4733 }
4734 }
4735 }
4736
TEST(F32_VELU__AVX_RR2_LUT16_P3_X24,beta)4737 TEST(F32_VELU__AVX_RR2_LUT16_P3_X24, beta) {
4738 TEST_REQUIRES_X86_AVX;
4739 for (float beta : std::vector<float>({0.3f, 3.0f})) {
4740 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
4741 VUnOpMicrokernelTester()
4742 .batch_size(batch_size)
4743 .beta(beta)
4744 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x24), VUnOpMicrokernelTester::OpType::ELU);
4745 }
4746 }
4747 }
4748 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4749
4750
4751 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX_RR2_LUT16_P3_X32,batch_eq_32)4752 TEST(F32_VELU__AVX_RR2_LUT16_P3_X32, batch_eq_32) {
4753 TEST_REQUIRES_X86_AVX;
4754 VUnOpMicrokernelTester()
4755 .batch_size(32)
4756 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32), VUnOpMicrokernelTester::OpType::ELU);
4757 }
4758
TEST(F32_VELU__AVX_RR2_LUT16_P3_X32,batch_div_32)4759 TEST(F32_VELU__AVX_RR2_LUT16_P3_X32, batch_div_32) {
4760 TEST_REQUIRES_X86_AVX;
4761 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
4762 VUnOpMicrokernelTester()
4763 .batch_size(batch_size)
4764 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32), VUnOpMicrokernelTester::OpType::ELU);
4765 }
4766 }
4767
TEST(F32_VELU__AVX_RR2_LUT16_P3_X32,batch_lt_32)4768 TEST(F32_VELU__AVX_RR2_LUT16_P3_X32, batch_lt_32) {
4769 TEST_REQUIRES_X86_AVX;
4770 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
4771 VUnOpMicrokernelTester()
4772 .batch_size(batch_size)
4773 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32), VUnOpMicrokernelTester::OpType::ELU);
4774 }
4775 }
4776
TEST(F32_VELU__AVX_RR2_LUT16_P3_X32,batch_gt_32)4777 TEST(F32_VELU__AVX_RR2_LUT16_P3_X32, batch_gt_32) {
4778 TEST_REQUIRES_X86_AVX;
4779 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
4780 VUnOpMicrokernelTester()
4781 .batch_size(batch_size)
4782 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32), VUnOpMicrokernelTester::OpType::ELU);
4783 }
4784 }
4785
TEST(F32_VELU__AVX_RR2_LUT16_P3_X32,inplace)4786 TEST(F32_VELU__AVX_RR2_LUT16_P3_X32, inplace) {
4787 TEST_REQUIRES_X86_AVX;
4788 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4789 VUnOpMicrokernelTester()
4790 .batch_size(batch_size)
4791 .inplace(true)
4792 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32), VUnOpMicrokernelTester::OpType::ELU);
4793 }
4794 }
4795
TEST(F32_VELU__AVX_RR2_LUT16_P3_X32,prescale)4796 TEST(F32_VELU__AVX_RR2_LUT16_P3_X32, prescale) {
4797 TEST_REQUIRES_X86_AVX;
4798 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
4799 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4800 VUnOpMicrokernelTester()
4801 .batch_size(batch_size)
4802 .prescale(prescale)
4803 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32), VUnOpMicrokernelTester::OpType::ELU);
4804 }
4805 }
4806 }
4807
TEST(F32_VELU__AVX_RR2_LUT16_P3_X32,alpha)4808 TEST(F32_VELU__AVX_RR2_LUT16_P3_X32, alpha) {
4809 TEST_REQUIRES_X86_AVX;
4810 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
4811 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4812 VUnOpMicrokernelTester()
4813 .batch_size(batch_size)
4814 .alpha(alpha)
4815 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32), VUnOpMicrokernelTester::OpType::ELU);
4816 }
4817 }
4818 }
4819
TEST(F32_VELU__AVX_RR2_LUT16_P3_X32,beta)4820 TEST(F32_VELU__AVX_RR2_LUT16_P3_X32, beta) {
4821 TEST_REQUIRES_X86_AVX;
4822 for (float beta : std::vector<float>({0.3f, 3.0f})) {
4823 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
4824 VUnOpMicrokernelTester()
4825 .batch_size(batch_size)
4826 .beta(beta)
4827 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x32), VUnOpMicrokernelTester::OpType::ELU);
4828 }
4829 }
4830 }
4831 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4832
4833
4834 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX_RR2_LUT16_P3_X40,batch_eq_40)4835 TEST(F32_VELU__AVX_RR2_LUT16_P3_X40, batch_eq_40) {
4836 TEST_REQUIRES_X86_AVX;
4837 VUnOpMicrokernelTester()
4838 .batch_size(40)
4839 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40), VUnOpMicrokernelTester::OpType::ELU);
4840 }
4841
TEST(F32_VELU__AVX_RR2_LUT16_P3_X40,batch_div_40)4842 TEST(F32_VELU__AVX_RR2_LUT16_P3_X40, batch_div_40) {
4843 TEST_REQUIRES_X86_AVX;
4844 for (size_t batch_size = 80; batch_size < 400; batch_size += 40) {
4845 VUnOpMicrokernelTester()
4846 .batch_size(batch_size)
4847 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40), VUnOpMicrokernelTester::OpType::ELU);
4848 }
4849 }
4850
TEST(F32_VELU__AVX_RR2_LUT16_P3_X40,batch_lt_40)4851 TEST(F32_VELU__AVX_RR2_LUT16_P3_X40, batch_lt_40) {
4852 TEST_REQUIRES_X86_AVX;
4853 for (size_t batch_size = 1; batch_size < 40; batch_size++) {
4854 VUnOpMicrokernelTester()
4855 .batch_size(batch_size)
4856 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40), VUnOpMicrokernelTester::OpType::ELU);
4857 }
4858 }
4859
TEST(F32_VELU__AVX_RR2_LUT16_P3_X40,batch_gt_40)4860 TEST(F32_VELU__AVX_RR2_LUT16_P3_X40, batch_gt_40) {
4861 TEST_REQUIRES_X86_AVX;
4862 for (size_t batch_size = 41; batch_size < 80; batch_size++) {
4863 VUnOpMicrokernelTester()
4864 .batch_size(batch_size)
4865 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40), VUnOpMicrokernelTester::OpType::ELU);
4866 }
4867 }
4868
TEST(F32_VELU__AVX_RR2_LUT16_P3_X40,inplace)4869 TEST(F32_VELU__AVX_RR2_LUT16_P3_X40, inplace) {
4870 TEST_REQUIRES_X86_AVX;
4871 for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
4872 VUnOpMicrokernelTester()
4873 .batch_size(batch_size)
4874 .inplace(true)
4875 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40), VUnOpMicrokernelTester::OpType::ELU);
4876 }
4877 }
4878
TEST(F32_VELU__AVX_RR2_LUT16_P3_X40,prescale)4879 TEST(F32_VELU__AVX_RR2_LUT16_P3_X40, prescale) {
4880 TEST_REQUIRES_X86_AVX;
4881 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
4882 for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
4883 VUnOpMicrokernelTester()
4884 .batch_size(batch_size)
4885 .prescale(prescale)
4886 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40), VUnOpMicrokernelTester::OpType::ELU);
4887 }
4888 }
4889 }
4890
TEST(F32_VELU__AVX_RR2_LUT16_P3_X40,alpha)4891 TEST(F32_VELU__AVX_RR2_LUT16_P3_X40, alpha) {
4892 TEST_REQUIRES_X86_AVX;
4893 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
4894 for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
4895 VUnOpMicrokernelTester()
4896 .batch_size(batch_size)
4897 .alpha(alpha)
4898 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40), VUnOpMicrokernelTester::OpType::ELU);
4899 }
4900 }
4901 }
4902
TEST(F32_VELU__AVX_RR2_LUT16_P3_X40,beta)4903 TEST(F32_VELU__AVX_RR2_LUT16_P3_X40, beta) {
4904 TEST_REQUIRES_X86_AVX;
4905 for (float beta : std::vector<float>({0.3f, 3.0f})) {
4906 for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
4907 VUnOpMicrokernelTester()
4908 .batch_size(batch_size)
4909 .beta(beta)
4910 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x40), VUnOpMicrokernelTester::OpType::ELU);
4911 }
4912 }
4913 }
4914 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4915
4916
4917 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX_RR2_LUT16_P3_X48,batch_eq_48)4918 TEST(F32_VELU__AVX_RR2_LUT16_P3_X48, batch_eq_48) {
4919 TEST_REQUIRES_X86_AVX;
4920 VUnOpMicrokernelTester()
4921 .batch_size(48)
4922 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48), VUnOpMicrokernelTester::OpType::ELU);
4923 }
4924
TEST(F32_VELU__AVX_RR2_LUT16_P3_X48,batch_div_48)4925 TEST(F32_VELU__AVX_RR2_LUT16_P3_X48, batch_div_48) {
4926 TEST_REQUIRES_X86_AVX;
4927 for (size_t batch_size = 96; batch_size < 480; batch_size += 48) {
4928 VUnOpMicrokernelTester()
4929 .batch_size(batch_size)
4930 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48), VUnOpMicrokernelTester::OpType::ELU);
4931 }
4932 }
4933
TEST(F32_VELU__AVX_RR2_LUT16_P3_X48,batch_lt_48)4934 TEST(F32_VELU__AVX_RR2_LUT16_P3_X48, batch_lt_48) {
4935 TEST_REQUIRES_X86_AVX;
4936 for (size_t batch_size = 1; batch_size < 48; batch_size++) {
4937 VUnOpMicrokernelTester()
4938 .batch_size(batch_size)
4939 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48), VUnOpMicrokernelTester::OpType::ELU);
4940 }
4941 }
4942
TEST(F32_VELU__AVX_RR2_LUT16_P3_X48,batch_gt_48)4943 TEST(F32_VELU__AVX_RR2_LUT16_P3_X48, batch_gt_48) {
4944 TEST_REQUIRES_X86_AVX;
4945 for (size_t batch_size = 49; batch_size < 96; batch_size++) {
4946 VUnOpMicrokernelTester()
4947 .batch_size(batch_size)
4948 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48), VUnOpMicrokernelTester::OpType::ELU);
4949 }
4950 }
4951
TEST(F32_VELU__AVX_RR2_LUT16_P3_X48,inplace)4952 TEST(F32_VELU__AVX_RR2_LUT16_P3_X48, inplace) {
4953 TEST_REQUIRES_X86_AVX;
4954 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
4955 VUnOpMicrokernelTester()
4956 .batch_size(batch_size)
4957 .inplace(true)
4958 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48), VUnOpMicrokernelTester::OpType::ELU);
4959 }
4960 }
4961
TEST(F32_VELU__AVX_RR2_LUT16_P3_X48,prescale)4962 TEST(F32_VELU__AVX_RR2_LUT16_P3_X48, prescale) {
4963 TEST_REQUIRES_X86_AVX;
4964 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
4965 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
4966 VUnOpMicrokernelTester()
4967 .batch_size(batch_size)
4968 .prescale(prescale)
4969 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48), VUnOpMicrokernelTester::OpType::ELU);
4970 }
4971 }
4972 }
4973
TEST(F32_VELU__AVX_RR2_LUT16_P3_X48,alpha)4974 TEST(F32_VELU__AVX_RR2_LUT16_P3_X48, alpha) {
4975 TEST_REQUIRES_X86_AVX;
4976 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
4977 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
4978 VUnOpMicrokernelTester()
4979 .batch_size(batch_size)
4980 .alpha(alpha)
4981 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48), VUnOpMicrokernelTester::OpType::ELU);
4982 }
4983 }
4984 }
4985
TEST(F32_VELU__AVX_RR2_LUT16_P3_X48,beta)4986 TEST(F32_VELU__AVX_RR2_LUT16_P3_X48, beta) {
4987 TEST_REQUIRES_X86_AVX;
4988 for (float beta : std::vector<float>({0.3f, 3.0f})) {
4989 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
4990 VUnOpMicrokernelTester()
4991 .batch_size(batch_size)
4992 .beta(beta)
4993 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_lut16_p3_x48), VUnOpMicrokernelTester::OpType::ELU);
4994 }
4995 }
4996 }
4997 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
4998
4999
5000 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX_RR2_P6_X8,batch_eq_8)5001 TEST(F32_VELU__AVX_RR2_P6_X8, batch_eq_8) {
5002 TEST_REQUIRES_X86_AVX;
5003 VUnOpMicrokernelTester()
5004 .batch_size(8)
5005 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_p6_x8), VUnOpMicrokernelTester::OpType::ELU);
5006 }
5007
TEST(F32_VELU__AVX_RR2_P6_X8,batch_div_8)5008 TEST(F32_VELU__AVX_RR2_P6_X8, batch_div_8) {
5009 TEST_REQUIRES_X86_AVX;
5010 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
5011 VUnOpMicrokernelTester()
5012 .batch_size(batch_size)
5013 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_p6_x8), VUnOpMicrokernelTester::OpType::ELU);
5014 }
5015 }
5016
TEST(F32_VELU__AVX_RR2_P6_X8,batch_lt_8)5017 TEST(F32_VELU__AVX_RR2_P6_X8, batch_lt_8) {
5018 TEST_REQUIRES_X86_AVX;
5019 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
5020 VUnOpMicrokernelTester()
5021 .batch_size(batch_size)
5022 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_p6_x8), VUnOpMicrokernelTester::OpType::ELU);
5023 }
5024 }
5025
TEST(F32_VELU__AVX_RR2_P6_X8,batch_gt_8)5026 TEST(F32_VELU__AVX_RR2_P6_X8, batch_gt_8) {
5027 TEST_REQUIRES_X86_AVX;
5028 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
5029 VUnOpMicrokernelTester()
5030 .batch_size(batch_size)
5031 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_p6_x8), VUnOpMicrokernelTester::OpType::ELU);
5032 }
5033 }
5034
TEST(F32_VELU__AVX_RR2_P6_X8,inplace)5035 TEST(F32_VELU__AVX_RR2_P6_X8, inplace) {
5036 TEST_REQUIRES_X86_AVX;
5037 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5038 VUnOpMicrokernelTester()
5039 .batch_size(batch_size)
5040 .inplace(true)
5041 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_p6_x8), VUnOpMicrokernelTester::OpType::ELU);
5042 }
5043 }
5044
TEST(F32_VELU__AVX_RR2_P6_X8,prescale)5045 TEST(F32_VELU__AVX_RR2_P6_X8, prescale) {
5046 TEST_REQUIRES_X86_AVX;
5047 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
5048 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5049 VUnOpMicrokernelTester()
5050 .batch_size(batch_size)
5051 .prescale(prescale)
5052 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_p6_x8), VUnOpMicrokernelTester::OpType::ELU);
5053 }
5054 }
5055 }
5056
TEST(F32_VELU__AVX_RR2_P6_X8,alpha)5057 TEST(F32_VELU__AVX_RR2_P6_X8, alpha) {
5058 TEST_REQUIRES_X86_AVX;
5059 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
5060 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5061 VUnOpMicrokernelTester()
5062 .batch_size(batch_size)
5063 .alpha(alpha)
5064 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_p6_x8), VUnOpMicrokernelTester::OpType::ELU);
5065 }
5066 }
5067 }
5068
TEST(F32_VELU__AVX_RR2_P6_X8,beta)5069 TEST(F32_VELU__AVX_RR2_P6_X8, beta) {
5070 TEST_REQUIRES_X86_AVX;
5071 for (float beta : std::vector<float>({0.3f, 3.0f})) {
5072 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5073 VUnOpMicrokernelTester()
5074 .batch_size(batch_size)
5075 .beta(beta)
5076 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_p6_x8), VUnOpMicrokernelTester::OpType::ELU);
5077 }
5078 }
5079 }
5080 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5081
5082
5083 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX_RR2_P6_X16,batch_eq_16)5084 TEST(F32_VELU__AVX_RR2_P6_X16, batch_eq_16) {
5085 TEST_REQUIRES_X86_AVX;
5086 VUnOpMicrokernelTester()
5087 .batch_size(16)
5088 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
5089 }
5090
TEST(F32_VELU__AVX_RR2_P6_X16,batch_div_16)5091 TEST(F32_VELU__AVX_RR2_P6_X16, batch_div_16) {
5092 TEST_REQUIRES_X86_AVX;
5093 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
5094 VUnOpMicrokernelTester()
5095 .batch_size(batch_size)
5096 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
5097 }
5098 }
5099
TEST(F32_VELU__AVX_RR2_P6_X16,batch_lt_16)5100 TEST(F32_VELU__AVX_RR2_P6_X16, batch_lt_16) {
5101 TEST_REQUIRES_X86_AVX;
5102 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
5103 VUnOpMicrokernelTester()
5104 .batch_size(batch_size)
5105 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
5106 }
5107 }
5108
TEST(F32_VELU__AVX_RR2_P6_X16,batch_gt_16)5109 TEST(F32_VELU__AVX_RR2_P6_X16, batch_gt_16) {
5110 TEST_REQUIRES_X86_AVX;
5111 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
5112 VUnOpMicrokernelTester()
5113 .batch_size(batch_size)
5114 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
5115 }
5116 }
5117
TEST(F32_VELU__AVX_RR2_P6_X16,inplace)5118 TEST(F32_VELU__AVX_RR2_P6_X16, inplace) {
5119 TEST_REQUIRES_X86_AVX;
5120 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5121 VUnOpMicrokernelTester()
5122 .batch_size(batch_size)
5123 .inplace(true)
5124 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
5125 }
5126 }
5127
TEST(F32_VELU__AVX_RR2_P6_X16,prescale)5128 TEST(F32_VELU__AVX_RR2_P6_X16, prescale) {
5129 TEST_REQUIRES_X86_AVX;
5130 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
5131 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5132 VUnOpMicrokernelTester()
5133 .batch_size(batch_size)
5134 .prescale(prescale)
5135 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
5136 }
5137 }
5138 }
5139
TEST(F32_VELU__AVX_RR2_P6_X16,alpha)5140 TEST(F32_VELU__AVX_RR2_P6_X16, alpha) {
5141 TEST_REQUIRES_X86_AVX;
5142 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
5143 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5144 VUnOpMicrokernelTester()
5145 .batch_size(batch_size)
5146 .alpha(alpha)
5147 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
5148 }
5149 }
5150 }
5151
TEST(F32_VELU__AVX_RR2_P6_X16,beta)5152 TEST(F32_VELU__AVX_RR2_P6_X16, beta) {
5153 TEST_REQUIRES_X86_AVX;
5154 for (float beta : std::vector<float>({0.3f, 3.0f})) {
5155 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5156 VUnOpMicrokernelTester()
5157 .batch_size(batch_size)
5158 .beta(beta)
5159 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
5160 }
5161 }
5162 }
5163 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5164
5165
5166 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX_RR2_P6_X24,batch_eq_24)5167 TEST(F32_VELU__AVX_RR2_P6_X24, batch_eq_24) {
5168 TEST_REQUIRES_X86_AVX;
5169 VUnOpMicrokernelTester()
5170 .batch_size(24)
5171 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_p6_x24), VUnOpMicrokernelTester::OpType::ELU);
5172 }
5173
TEST(F32_VELU__AVX_RR2_P6_X24,batch_div_24)5174 TEST(F32_VELU__AVX_RR2_P6_X24, batch_div_24) {
5175 TEST_REQUIRES_X86_AVX;
5176 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
5177 VUnOpMicrokernelTester()
5178 .batch_size(batch_size)
5179 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_p6_x24), VUnOpMicrokernelTester::OpType::ELU);
5180 }
5181 }
5182
TEST(F32_VELU__AVX_RR2_P6_X24,batch_lt_24)5183 TEST(F32_VELU__AVX_RR2_P6_X24, batch_lt_24) {
5184 TEST_REQUIRES_X86_AVX;
5185 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
5186 VUnOpMicrokernelTester()
5187 .batch_size(batch_size)
5188 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_p6_x24), VUnOpMicrokernelTester::OpType::ELU);
5189 }
5190 }
5191
TEST(F32_VELU__AVX_RR2_P6_X24,batch_gt_24)5192 TEST(F32_VELU__AVX_RR2_P6_X24, batch_gt_24) {
5193 TEST_REQUIRES_X86_AVX;
5194 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
5195 VUnOpMicrokernelTester()
5196 .batch_size(batch_size)
5197 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_p6_x24), VUnOpMicrokernelTester::OpType::ELU);
5198 }
5199 }
5200
TEST(F32_VELU__AVX_RR2_P6_X24,inplace)5201 TEST(F32_VELU__AVX_RR2_P6_X24, inplace) {
5202 TEST_REQUIRES_X86_AVX;
5203 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5204 VUnOpMicrokernelTester()
5205 .batch_size(batch_size)
5206 .inplace(true)
5207 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_p6_x24), VUnOpMicrokernelTester::OpType::ELU);
5208 }
5209 }
5210
TEST(F32_VELU__AVX_RR2_P6_X24,prescale)5211 TEST(F32_VELU__AVX_RR2_P6_X24, prescale) {
5212 TEST_REQUIRES_X86_AVX;
5213 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
5214 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5215 VUnOpMicrokernelTester()
5216 .batch_size(batch_size)
5217 .prescale(prescale)
5218 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_p6_x24), VUnOpMicrokernelTester::OpType::ELU);
5219 }
5220 }
5221 }
5222
TEST(F32_VELU__AVX_RR2_P6_X24,alpha)5223 TEST(F32_VELU__AVX_RR2_P6_X24, alpha) {
5224 TEST_REQUIRES_X86_AVX;
5225 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
5226 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5227 VUnOpMicrokernelTester()
5228 .batch_size(batch_size)
5229 .alpha(alpha)
5230 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_p6_x24), VUnOpMicrokernelTester::OpType::ELU);
5231 }
5232 }
5233 }
5234
TEST(F32_VELU__AVX_RR2_P6_X24,beta)5235 TEST(F32_VELU__AVX_RR2_P6_X24, beta) {
5236 TEST_REQUIRES_X86_AVX;
5237 for (float beta : std::vector<float>({0.3f, 3.0f})) {
5238 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5239 VUnOpMicrokernelTester()
5240 .batch_size(batch_size)
5241 .beta(beta)
5242 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_p6_x24), VUnOpMicrokernelTester::OpType::ELU);
5243 }
5244 }
5245 }
5246 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5247
5248
5249 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX_RR2_P6_X32,batch_eq_32)5250 TEST(F32_VELU__AVX_RR2_P6_X32, batch_eq_32) {
5251 TEST_REQUIRES_X86_AVX;
5252 VUnOpMicrokernelTester()
5253 .batch_size(32)
5254 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_p6_x32), VUnOpMicrokernelTester::OpType::ELU);
5255 }
5256
TEST(F32_VELU__AVX_RR2_P6_X32,batch_div_32)5257 TEST(F32_VELU__AVX_RR2_P6_X32, batch_div_32) {
5258 TEST_REQUIRES_X86_AVX;
5259 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
5260 VUnOpMicrokernelTester()
5261 .batch_size(batch_size)
5262 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_p6_x32), VUnOpMicrokernelTester::OpType::ELU);
5263 }
5264 }
5265
TEST(F32_VELU__AVX_RR2_P6_X32,batch_lt_32)5266 TEST(F32_VELU__AVX_RR2_P6_X32, batch_lt_32) {
5267 TEST_REQUIRES_X86_AVX;
5268 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
5269 VUnOpMicrokernelTester()
5270 .batch_size(batch_size)
5271 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_p6_x32), VUnOpMicrokernelTester::OpType::ELU);
5272 }
5273 }
5274
TEST(F32_VELU__AVX_RR2_P6_X32,batch_gt_32)5275 TEST(F32_VELU__AVX_RR2_P6_X32, batch_gt_32) {
5276 TEST_REQUIRES_X86_AVX;
5277 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
5278 VUnOpMicrokernelTester()
5279 .batch_size(batch_size)
5280 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_p6_x32), VUnOpMicrokernelTester::OpType::ELU);
5281 }
5282 }
5283
TEST(F32_VELU__AVX_RR2_P6_X32,inplace)5284 TEST(F32_VELU__AVX_RR2_P6_X32, inplace) {
5285 TEST_REQUIRES_X86_AVX;
5286 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5287 VUnOpMicrokernelTester()
5288 .batch_size(batch_size)
5289 .inplace(true)
5290 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_p6_x32), VUnOpMicrokernelTester::OpType::ELU);
5291 }
5292 }
5293
TEST(F32_VELU__AVX_RR2_P6_X32,prescale)5294 TEST(F32_VELU__AVX_RR2_P6_X32, prescale) {
5295 TEST_REQUIRES_X86_AVX;
5296 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
5297 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5298 VUnOpMicrokernelTester()
5299 .batch_size(batch_size)
5300 .prescale(prescale)
5301 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_p6_x32), VUnOpMicrokernelTester::OpType::ELU);
5302 }
5303 }
5304 }
5305
TEST(F32_VELU__AVX_RR2_P6_X32,alpha)5306 TEST(F32_VELU__AVX_RR2_P6_X32, alpha) {
5307 TEST_REQUIRES_X86_AVX;
5308 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
5309 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5310 VUnOpMicrokernelTester()
5311 .batch_size(batch_size)
5312 .alpha(alpha)
5313 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_p6_x32), VUnOpMicrokernelTester::OpType::ELU);
5314 }
5315 }
5316 }
5317
TEST(F32_VELU__AVX_RR2_P6_X32,beta)5318 TEST(F32_VELU__AVX_RR2_P6_X32, beta) {
5319 TEST_REQUIRES_X86_AVX;
5320 for (float beta : std::vector<float>({0.3f, 3.0f})) {
5321 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5322 VUnOpMicrokernelTester()
5323 .batch_size(batch_size)
5324 .beta(beta)
5325 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_p6_x32), VUnOpMicrokernelTester::OpType::ELU);
5326 }
5327 }
5328 }
5329 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5330
5331
5332 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX_RR2_P6_X40,batch_eq_40)5333 TEST(F32_VELU__AVX_RR2_P6_X40, batch_eq_40) {
5334 TEST_REQUIRES_X86_AVX;
5335 VUnOpMicrokernelTester()
5336 .batch_size(40)
5337 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_p6_x40), VUnOpMicrokernelTester::OpType::ELU);
5338 }
5339
TEST(F32_VELU__AVX_RR2_P6_X40,batch_div_40)5340 TEST(F32_VELU__AVX_RR2_P6_X40, batch_div_40) {
5341 TEST_REQUIRES_X86_AVX;
5342 for (size_t batch_size = 80; batch_size < 400; batch_size += 40) {
5343 VUnOpMicrokernelTester()
5344 .batch_size(batch_size)
5345 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_p6_x40), VUnOpMicrokernelTester::OpType::ELU);
5346 }
5347 }
5348
TEST(F32_VELU__AVX_RR2_P6_X40,batch_lt_40)5349 TEST(F32_VELU__AVX_RR2_P6_X40, batch_lt_40) {
5350 TEST_REQUIRES_X86_AVX;
5351 for (size_t batch_size = 1; batch_size < 40; batch_size++) {
5352 VUnOpMicrokernelTester()
5353 .batch_size(batch_size)
5354 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_p6_x40), VUnOpMicrokernelTester::OpType::ELU);
5355 }
5356 }
5357
TEST(F32_VELU__AVX_RR2_P6_X40,batch_gt_40)5358 TEST(F32_VELU__AVX_RR2_P6_X40, batch_gt_40) {
5359 TEST_REQUIRES_X86_AVX;
5360 for (size_t batch_size = 41; batch_size < 80; batch_size++) {
5361 VUnOpMicrokernelTester()
5362 .batch_size(batch_size)
5363 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_p6_x40), VUnOpMicrokernelTester::OpType::ELU);
5364 }
5365 }
5366
TEST(F32_VELU__AVX_RR2_P6_X40,inplace)5367 TEST(F32_VELU__AVX_RR2_P6_X40, inplace) {
5368 TEST_REQUIRES_X86_AVX;
5369 for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
5370 VUnOpMicrokernelTester()
5371 .batch_size(batch_size)
5372 .inplace(true)
5373 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_p6_x40), VUnOpMicrokernelTester::OpType::ELU);
5374 }
5375 }
5376
TEST(F32_VELU__AVX_RR2_P6_X40,prescale)5377 TEST(F32_VELU__AVX_RR2_P6_X40, prescale) {
5378 TEST_REQUIRES_X86_AVX;
5379 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
5380 for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
5381 VUnOpMicrokernelTester()
5382 .batch_size(batch_size)
5383 .prescale(prescale)
5384 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_p6_x40), VUnOpMicrokernelTester::OpType::ELU);
5385 }
5386 }
5387 }
5388
TEST(F32_VELU__AVX_RR2_P6_X40,alpha)5389 TEST(F32_VELU__AVX_RR2_P6_X40, alpha) {
5390 TEST_REQUIRES_X86_AVX;
5391 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
5392 for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
5393 VUnOpMicrokernelTester()
5394 .batch_size(batch_size)
5395 .alpha(alpha)
5396 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_p6_x40), VUnOpMicrokernelTester::OpType::ELU);
5397 }
5398 }
5399 }
5400
TEST(F32_VELU__AVX_RR2_P6_X40,beta)5401 TEST(F32_VELU__AVX_RR2_P6_X40, beta) {
5402 TEST_REQUIRES_X86_AVX;
5403 for (float beta : std::vector<float>({0.3f, 3.0f})) {
5404 for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
5405 VUnOpMicrokernelTester()
5406 .batch_size(batch_size)
5407 .beta(beta)
5408 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_p6_x40), VUnOpMicrokernelTester::OpType::ELU);
5409 }
5410 }
5411 }
5412 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5413
5414
5415 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX_RR2_P6_X48,batch_eq_48)5416 TEST(F32_VELU__AVX_RR2_P6_X48, batch_eq_48) {
5417 TEST_REQUIRES_X86_AVX;
5418 VUnOpMicrokernelTester()
5419 .batch_size(48)
5420 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_p6_x48), VUnOpMicrokernelTester::OpType::ELU);
5421 }
5422
TEST(F32_VELU__AVX_RR2_P6_X48,batch_div_48)5423 TEST(F32_VELU__AVX_RR2_P6_X48, batch_div_48) {
5424 TEST_REQUIRES_X86_AVX;
5425 for (size_t batch_size = 96; batch_size < 480; batch_size += 48) {
5426 VUnOpMicrokernelTester()
5427 .batch_size(batch_size)
5428 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_p6_x48), VUnOpMicrokernelTester::OpType::ELU);
5429 }
5430 }
5431
TEST(F32_VELU__AVX_RR2_P6_X48,batch_lt_48)5432 TEST(F32_VELU__AVX_RR2_P6_X48, batch_lt_48) {
5433 TEST_REQUIRES_X86_AVX;
5434 for (size_t batch_size = 1; batch_size < 48; batch_size++) {
5435 VUnOpMicrokernelTester()
5436 .batch_size(batch_size)
5437 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_p6_x48), VUnOpMicrokernelTester::OpType::ELU);
5438 }
5439 }
5440
TEST(F32_VELU__AVX_RR2_P6_X48,batch_gt_48)5441 TEST(F32_VELU__AVX_RR2_P6_X48, batch_gt_48) {
5442 TEST_REQUIRES_X86_AVX;
5443 for (size_t batch_size = 49; batch_size < 96; batch_size++) {
5444 VUnOpMicrokernelTester()
5445 .batch_size(batch_size)
5446 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_p6_x48), VUnOpMicrokernelTester::OpType::ELU);
5447 }
5448 }
5449
TEST(F32_VELU__AVX_RR2_P6_X48,inplace)5450 TEST(F32_VELU__AVX_RR2_P6_X48, inplace) {
5451 TEST_REQUIRES_X86_AVX;
5452 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
5453 VUnOpMicrokernelTester()
5454 .batch_size(batch_size)
5455 .inplace(true)
5456 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_p6_x48), VUnOpMicrokernelTester::OpType::ELU);
5457 }
5458 }
5459
TEST(F32_VELU__AVX_RR2_P6_X48,prescale)5460 TEST(F32_VELU__AVX_RR2_P6_X48, prescale) {
5461 TEST_REQUIRES_X86_AVX;
5462 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
5463 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
5464 VUnOpMicrokernelTester()
5465 .batch_size(batch_size)
5466 .prescale(prescale)
5467 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_p6_x48), VUnOpMicrokernelTester::OpType::ELU);
5468 }
5469 }
5470 }
5471
TEST(F32_VELU__AVX_RR2_P6_X48,alpha)5472 TEST(F32_VELU__AVX_RR2_P6_X48, alpha) {
5473 TEST_REQUIRES_X86_AVX;
5474 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
5475 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
5476 VUnOpMicrokernelTester()
5477 .batch_size(batch_size)
5478 .alpha(alpha)
5479 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_p6_x48), VUnOpMicrokernelTester::OpType::ELU);
5480 }
5481 }
5482 }
5483
TEST(F32_VELU__AVX_RR2_P6_X48,beta)5484 TEST(F32_VELU__AVX_RR2_P6_X48, beta) {
5485 TEST_REQUIRES_X86_AVX;
5486 for (float beta : std::vector<float>({0.3f, 3.0f})) {
5487 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
5488 VUnOpMicrokernelTester()
5489 .batch_size(batch_size)
5490 .beta(beta)
5491 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx_rr2_p6_x48), VUnOpMicrokernelTester::OpType::ELU);
5492 }
5493 }
5494 }
5495 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5496
5497
5498 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X8,batch_eq_8)5499 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X8, batch_eq_8) {
5500 TEST_REQUIRES_X86_AVX2;
5501 VUnOpMicrokernelTester()
5502 .batch_size(8)
5503 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x8), VUnOpMicrokernelTester::OpType::ELU);
5504 }
5505
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X8,batch_div_8)5506 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X8, batch_div_8) {
5507 TEST_REQUIRES_X86_AVX2;
5508 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
5509 VUnOpMicrokernelTester()
5510 .batch_size(batch_size)
5511 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x8), VUnOpMicrokernelTester::OpType::ELU);
5512 }
5513 }
5514
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X8,batch_lt_8)5515 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X8, batch_lt_8) {
5516 TEST_REQUIRES_X86_AVX2;
5517 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
5518 VUnOpMicrokernelTester()
5519 .batch_size(batch_size)
5520 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x8), VUnOpMicrokernelTester::OpType::ELU);
5521 }
5522 }
5523
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X8,batch_gt_8)5524 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X8, batch_gt_8) {
5525 TEST_REQUIRES_X86_AVX2;
5526 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
5527 VUnOpMicrokernelTester()
5528 .batch_size(batch_size)
5529 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x8), VUnOpMicrokernelTester::OpType::ELU);
5530 }
5531 }
5532
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X8,inplace)5533 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X8, inplace) {
5534 TEST_REQUIRES_X86_AVX2;
5535 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5536 VUnOpMicrokernelTester()
5537 .batch_size(batch_size)
5538 .inplace(true)
5539 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x8), VUnOpMicrokernelTester::OpType::ELU);
5540 }
5541 }
5542
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X8,prescale)5543 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X8, prescale) {
5544 TEST_REQUIRES_X86_AVX2;
5545 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
5546 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5547 VUnOpMicrokernelTester()
5548 .batch_size(batch_size)
5549 .prescale(prescale)
5550 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x8), VUnOpMicrokernelTester::OpType::ELU);
5551 }
5552 }
5553 }
5554
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X8,alpha)5555 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X8, alpha) {
5556 TEST_REQUIRES_X86_AVX2;
5557 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
5558 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5559 VUnOpMicrokernelTester()
5560 .batch_size(batch_size)
5561 .alpha(alpha)
5562 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x8), VUnOpMicrokernelTester::OpType::ELU);
5563 }
5564 }
5565 }
5566
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X8,beta)5567 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X8, beta) {
5568 TEST_REQUIRES_X86_AVX2;
5569 for (float beta : std::vector<float>({0.3f, 3.0f})) {
5570 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
5571 VUnOpMicrokernelTester()
5572 .batch_size(batch_size)
5573 .beta(beta)
5574 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x8), VUnOpMicrokernelTester::OpType::ELU);
5575 }
5576 }
5577 }
5578 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5579
5580
5581 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X16,batch_eq_16)5582 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X16, batch_eq_16) {
5583 TEST_REQUIRES_X86_AVX2;
5584 VUnOpMicrokernelTester()
5585 .batch_size(16)
5586 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x16), VUnOpMicrokernelTester::OpType::ELU);
5587 }
5588
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X16,batch_div_16)5589 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X16, batch_div_16) {
5590 TEST_REQUIRES_X86_AVX2;
5591 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
5592 VUnOpMicrokernelTester()
5593 .batch_size(batch_size)
5594 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x16), VUnOpMicrokernelTester::OpType::ELU);
5595 }
5596 }
5597
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X16,batch_lt_16)5598 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X16, batch_lt_16) {
5599 TEST_REQUIRES_X86_AVX2;
5600 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
5601 VUnOpMicrokernelTester()
5602 .batch_size(batch_size)
5603 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x16), VUnOpMicrokernelTester::OpType::ELU);
5604 }
5605 }
5606
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X16,batch_gt_16)5607 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X16, batch_gt_16) {
5608 TEST_REQUIRES_X86_AVX2;
5609 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
5610 VUnOpMicrokernelTester()
5611 .batch_size(batch_size)
5612 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x16), VUnOpMicrokernelTester::OpType::ELU);
5613 }
5614 }
5615
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X16,inplace)5616 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X16, inplace) {
5617 TEST_REQUIRES_X86_AVX2;
5618 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5619 VUnOpMicrokernelTester()
5620 .batch_size(batch_size)
5621 .inplace(true)
5622 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x16), VUnOpMicrokernelTester::OpType::ELU);
5623 }
5624 }
5625
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X16,prescale)5626 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X16, prescale) {
5627 TEST_REQUIRES_X86_AVX2;
5628 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
5629 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5630 VUnOpMicrokernelTester()
5631 .batch_size(batch_size)
5632 .prescale(prescale)
5633 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x16), VUnOpMicrokernelTester::OpType::ELU);
5634 }
5635 }
5636 }
5637
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X16,alpha)5638 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X16, alpha) {
5639 TEST_REQUIRES_X86_AVX2;
5640 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
5641 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5642 VUnOpMicrokernelTester()
5643 .batch_size(batch_size)
5644 .alpha(alpha)
5645 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x16), VUnOpMicrokernelTester::OpType::ELU);
5646 }
5647 }
5648 }
5649
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X16,beta)5650 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X16, beta) {
5651 TEST_REQUIRES_X86_AVX2;
5652 for (float beta : std::vector<float>({0.3f, 3.0f})) {
5653 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
5654 VUnOpMicrokernelTester()
5655 .batch_size(batch_size)
5656 .beta(beta)
5657 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x16), VUnOpMicrokernelTester::OpType::ELU);
5658 }
5659 }
5660 }
5661 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5662
5663
5664 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X24,batch_eq_24)5665 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X24, batch_eq_24) {
5666 TEST_REQUIRES_X86_AVX2;
5667 VUnOpMicrokernelTester()
5668 .batch_size(24)
5669 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x24), VUnOpMicrokernelTester::OpType::ELU);
5670 }
5671
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X24,batch_div_24)5672 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X24, batch_div_24) {
5673 TEST_REQUIRES_X86_AVX2;
5674 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
5675 VUnOpMicrokernelTester()
5676 .batch_size(batch_size)
5677 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x24), VUnOpMicrokernelTester::OpType::ELU);
5678 }
5679 }
5680
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X24,batch_lt_24)5681 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X24, batch_lt_24) {
5682 TEST_REQUIRES_X86_AVX2;
5683 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
5684 VUnOpMicrokernelTester()
5685 .batch_size(batch_size)
5686 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x24), VUnOpMicrokernelTester::OpType::ELU);
5687 }
5688 }
5689
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X24,batch_gt_24)5690 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X24, batch_gt_24) {
5691 TEST_REQUIRES_X86_AVX2;
5692 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
5693 VUnOpMicrokernelTester()
5694 .batch_size(batch_size)
5695 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x24), VUnOpMicrokernelTester::OpType::ELU);
5696 }
5697 }
5698
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X24,inplace)5699 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X24, inplace) {
5700 TEST_REQUIRES_X86_AVX2;
5701 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5702 VUnOpMicrokernelTester()
5703 .batch_size(batch_size)
5704 .inplace(true)
5705 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x24), VUnOpMicrokernelTester::OpType::ELU);
5706 }
5707 }
5708
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X24,prescale)5709 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X24, prescale) {
5710 TEST_REQUIRES_X86_AVX2;
5711 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
5712 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5713 VUnOpMicrokernelTester()
5714 .batch_size(batch_size)
5715 .prescale(prescale)
5716 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x24), VUnOpMicrokernelTester::OpType::ELU);
5717 }
5718 }
5719 }
5720
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X24,alpha)5721 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X24, alpha) {
5722 TEST_REQUIRES_X86_AVX2;
5723 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
5724 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5725 VUnOpMicrokernelTester()
5726 .batch_size(batch_size)
5727 .alpha(alpha)
5728 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x24), VUnOpMicrokernelTester::OpType::ELU);
5729 }
5730 }
5731 }
5732
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X24,beta)5733 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X24, beta) {
5734 TEST_REQUIRES_X86_AVX2;
5735 for (float beta : std::vector<float>({0.3f, 3.0f})) {
5736 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
5737 VUnOpMicrokernelTester()
5738 .batch_size(batch_size)
5739 .beta(beta)
5740 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x24), VUnOpMicrokernelTester::OpType::ELU);
5741 }
5742 }
5743 }
5744 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5745
5746
5747 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X32,batch_eq_32)5748 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X32, batch_eq_32) {
5749 TEST_REQUIRES_X86_AVX2;
5750 VUnOpMicrokernelTester()
5751 .batch_size(32)
5752 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x32), VUnOpMicrokernelTester::OpType::ELU);
5753 }
5754
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X32,batch_div_32)5755 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X32, batch_div_32) {
5756 TEST_REQUIRES_X86_AVX2;
5757 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
5758 VUnOpMicrokernelTester()
5759 .batch_size(batch_size)
5760 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x32), VUnOpMicrokernelTester::OpType::ELU);
5761 }
5762 }
5763
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X32,batch_lt_32)5764 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X32, batch_lt_32) {
5765 TEST_REQUIRES_X86_AVX2;
5766 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
5767 VUnOpMicrokernelTester()
5768 .batch_size(batch_size)
5769 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x32), VUnOpMicrokernelTester::OpType::ELU);
5770 }
5771 }
5772
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X32,batch_gt_32)5773 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X32, batch_gt_32) {
5774 TEST_REQUIRES_X86_AVX2;
5775 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
5776 VUnOpMicrokernelTester()
5777 .batch_size(batch_size)
5778 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x32), VUnOpMicrokernelTester::OpType::ELU);
5779 }
5780 }
5781
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X32,inplace)5782 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X32, inplace) {
5783 TEST_REQUIRES_X86_AVX2;
5784 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5785 VUnOpMicrokernelTester()
5786 .batch_size(batch_size)
5787 .inplace(true)
5788 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x32), VUnOpMicrokernelTester::OpType::ELU);
5789 }
5790 }
5791
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X32,prescale)5792 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X32, prescale) {
5793 TEST_REQUIRES_X86_AVX2;
5794 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
5795 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5796 VUnOpMicrokernelTester()
5797 .batch_size(batch_size)
5798 .prescale(prescale)
5799 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x32), VUnOpMicrokernelTester::OpType::ELU);
5800 }
5801 }
5802 }
5803
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X32,alpha)5804 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X32, alpha) {
5805 TEST_REQUIRES_X86_AVX2;
5806 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
5807 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5808 VUnOpMicrokernelTester()
5809 .batch_size(batch_size)
5810 .alpha(alpha)
5811 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x32), VUnOpMicrokernelTester::OpType::ELU);
5812 }
5813 }
5814 }
5815
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X32,beta)5816 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X32, beta) {
5817 TEST_REQUIRES_X86_AVX2;
5818 for (float beta : std::vector<float>({0.3f, 3.0f})) {
5819 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
5820 VUnOpMicrokernelTester()
5821 .batch_size(batch_size)
5822 .beta(beta)
5823 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x32), VUnOpMicrokernelTester::OpType::ELU);
5824 }
5825 }
5826 }
5827 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5828
5829
5830 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X40,batch_eq_40)5831 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X40, batch_eq_40) {
5832 TEST_REQUIRES_X86_AVX2;
5833 VUnOpMicrokernelTester()
5834 .batch_size(40)
5835 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x40), VUnOpMicrokernelTester::OpType::ELU);
5836 }
5837
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X40,batch_div_40)5838 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X40, batch_div_40) {
5839 TEST_REQUIRES_X86_AVX2;
5840 for (size_t batch_size = 80; batch_size < 400; batch_size += 40) {
5841 VUnOpMicrokernelTester()
5842 .batch_size(batch_size)
5843 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x40), VUnOpMicrokernelTester::OpType::ELU);
5844 }
5845 }
5846
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X40,batch_lt_40)5847 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X40, batch_lt_40) {
5848 TEST_REQUIRES_X86_AVX2;
5849 for (size_t batch_size = 1; batch_size < 40; batch_size++) {
5850 VUnOpMicrokernelTester()
5851 .batch_size(batch_size)
5852 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x40), VUnOpMicrokernelTester::OpType::ELU);
5853 }
5854 }
5855
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X40,batch_gt_40)5856 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X40, batch_gt_40) {
5857 TEST_REQUIRES_X86_AVX2;
5858 for (size_t batch_size = 41; batch_size < 80; batch_size++) {
5859 VUnOpMicrokernelTester()
5860 .batch_size(batch_size)
5861 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x40), VUnOpMicrokernelTester::OpType::ELU);
5862 }
5863 }
5864
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X40,inplace)5865 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X40, inplace) {
5866 TEST_REQUIRES_X86_AVX2;
5867 for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
5868 VUnOpMicrokernelTester()
5869 .batch_size(batch_size)
5870 .inplace(true)
5871 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x40), VUnOpMicrokernelTester::OpType::ELU);
5872 }
5873 }
5874
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X40,prescale)5875 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X40, prescale) {
5876 TEST_REQUIRES_X86_AVX2;
5877 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
5878 for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
5879 VUnOpMicrokernelTester()
5880 .batch_size(batch_size)
5881 .prescale(prescale)
5882 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x40), VUnOpMicrokernelTester::OpType::ELU);
5883 }
5884 }
5885 }
5886
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X40,alpha)5887 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X40, alpha) {
5888 TEST_REQUIRES_X86_AVX2;
5889 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
5890 for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
5891 VUnOpMicrokernelTester()
5892 .batch_size(batch_size)
5893 .alpha(alpha)
5894 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x40), VUnOpMicrokernelTester::OpType::ELU);
5895 }
5896 }
5897 }
5898
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X40,beta)5899 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X40, beta) {
5900 TEST_REQUIRES_X86_AVX2;
5901 for (float beta : std::vector<float>({0.3f, 3.0f})) {
5902 for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
5903 VUnOpMicrokernelTester()
5904 .batch_size(batch_size)
5905 .beta(beta)
5906 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x40), VUnOpMicrokernelTester::OpType::ELU);
5907 }
5908 }
5909 }
5910 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5911
5912
5913 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X48,batch_eq_48)5914 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X48, batch_eq_48) {
5915 TEST_REQUIRES_X86_AVX2;
5916 VUnOpMicrokernelTester()
5917 .batch_size(48)
5918 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x48), VUnOpMicrokernelTester::OpType::ELU);
5919 }
5920
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X48,batch_div_48)5921 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X48, batch_div_48) {
5922 TEST_REQUIRES_X86_AVX2;
5923 for (size_t batch_size = 96; batch_size < 480; batch_size += 48) {
5924 VUnOpMicrokernelTester()
5925 .batch_size(batch_size)
5926 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x48), VUnOpMicrokernelTester::OpType::ELU);
5927 }
5928 }
5929
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X48,batch_lt_48)5930 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X48, batch_lt_48) {
5931 TEST_REQUIRES_X86_AVX2;
5932 for (size_t batch_size = 1; batch_size < 48; batch_size++) {
5933 VUnOpMicrokernelTester()
5934 .batch_size(batch_size)
5935 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x48), VUnOpMicrokernelTester::OpType::ELU);
5936 }
5937 }
5938
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X48,batch_gt_48)5939 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X48, batch_gt_48) {
5940 TEST_REQUIRES_X86_AVX2;
5941 for (size_t batch_size = 49; batch_size < 96; batch_size++) {
5942 VUnOpMicrokernelTester()
5943 .batch_size(batch_size)
5944 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x48), VUnOpMicrokernelTester::OpType::ELU);
5945 }
5946 }
5947
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X48,inplace)5948 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X48, inplace) {
5949 TEST_REQUIRES_X86_AVX2;
5950 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
5951 VUnOpMicrokernelTester()
5952 .batch_size(batch_size)
5953 .inplace(true)
5954 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x48), VUnOpMicrokernelTester::OpType::ELU);
5955 }
5956 }
5957
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X48,prescale)5958 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X48, prescale) {
5959 TEST_REQUIRES_X86_AVX2;
5960 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
5961 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
5962 VUnOpMicrokernelTester()
5963 .batch_size(batch_size)
5964 .prescale(prescale)
5965 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x48), VUnOpMicrokernelTester::OpType::ELU);
5966 }
5967 }
5968 }
5969
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X48,alpha)5970 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X48, alpha) {
5971 TEST_REQUIRES_X86_AVX2;
5972 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
5973 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
5974 VUnOpMicrokernelTester()
5975 .batch_size(batch_size)
5976 .alpha(alpha)
5977 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x48), VUnOpMicrokernelTester::OpType::ELU);
5978 }
5979 }
5980 }
5981
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X48,beta)5982 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X48, beta) {
5983 TEST_REQUIRES_X86_AVX2;
5984 for (float beta : std::vector<float>({0.3f, 3.0f})) {
5985 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
5986 VUnOpMicrokernelTester()
5987 .batch_size(batch_size)
5988 .beta(beta)
5989 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x48), VUnOpMicrokernelTester::OpType::ELU);
5990 }
5991 }
5992 }
5993 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
5994
5995
5996 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X56,batch_eq_56)5997 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X56, batch_eq_56) {
5998 TEST_REQUIRES_X86_AVX2;
5999 VUnOpMicrokernelTester()
6000 .batch_size(56)
6001 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x56), VUnOpMicrokernelTester::OpType::ELU);
6002 }
6003
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X56,batch_div_56)6004 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X56, batch_div_56) {
6005 TEST_REQUIRES_X86_AVX2;
6006 for (size_t batch_size = 112; batch_size < 560; batch_size += 56) {
6007 VUnOpMicrokernelTester()
6008 .batch_size(batch_size)
6009 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x56), VUnOpMicrokernelTester::OpType::ELU);
6010 }
6011 }
6012
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X56,batch_lt_56)6013 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X56, batch_lt_56) {
6014 TEST_REQUIRES_X86_AVX2;
6015 for (size_t batch_size = 1; batch_size < 56; batch_size++) {
6016 VUnOpMicrokernelTester()
6017 .batch_size(batch_size)
6018 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x56), VUnOpMicrokernelTester::OpType::ELU);
6019 }
6020 }
6021
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X56,batch_gt_56)6022 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X56, batch_gt_56) {
6023 TEST_REQUIRES_X86_AVX2;
6024 for (size_t batch_size = 57; batch_size < 112; batch_size++) {
6025 VUnOpMicrokernelTester()
6026 .batch_size(batch_size)
6027 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x56), VUnOpMicrokernelTester::OpType::ELU);
6028 }
6029 }
6030
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X56,inplace)6031 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X56, inplace) {
6032 TEST_REQUIRES_X86_AVX2;
6033 for (size_t batch_size = 1; batch_size <= 280; batch_size += 55) {
6034 VUnOpMicrokernelTester()
6035 .batch_size(batch_size)
6036 .inplace(true)
6037 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x56), VUnOpMicrokernelTester::OpType::ELU);
6038 }
6039 }
6040
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X56,prescale)6041 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X56, prescale) {
6042 TEST_REQUIRES_X86_AVX2;
6043 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
6044 for (size_t batch_size = 1; batch_size <= 280; batch_size += 55) {
6045 VUnOpMicrokernelTester()
6046 .batch_size(batch_size)
6047 .prescale(prescale)
6048 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x56), VUnOpMicrokernelTester::OpType::ELU);
6049 }
6050 }
6051 }
6052
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X56,alpha)6053 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X56, alpha) {
6054 TEST_REQUIRES_X86_AVX2;
6055 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
6056 for (size_t batch_size = 1; batch_size <= 280; batch_size += 55) {
6057 VUnOpMicrokernelTester()
6058 .batch_size(batch_size)
6059 .alpha(alpha)
6060 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x56), VUnOpMicrokernelTester::OpType::ELU);
6061 }
6062 }
6063 }
6064
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X56,beta)6065 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X56, beta) {
6066 TEST_REQUIRES_X86_AVX2;
6067 for (float beta : std::vector<float>({0.3f, 3.0f})) {
6068 for (size_t batch_size = 1; batch_size <= 280; batch_size += 55) {
6069 VUnOpMicrokernelTester()
6070 .batch_size(batch_size)
6071 .beta(beta)
6072 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x56), VUnOpMicrokernelTester::OpType::ELU);
6073 }
6074 }
6075 }
6076 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6077
6078
6079 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X64,batch_eq_64)6080 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X64, batch_eq_64) {
6081 TEST_REQUIRES_X86_AVX2;
6082 VUnOpMicrokernelTester()
6083 .batch_size(64)
6084 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x64), VUnOpMicrokernelTester::OpType::ELU);
6085 }
6086
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X64,batch_div_64)6087 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X64, batch_div_64) {
6088 TEST_REQUIRES_X86_AVX2;
6089 for (size_t batch_size = 128; batch_size < 640; batch_size += 64) {
6090 VUnOpMicrokernelTester()
6091 .batch_size(batch_size)
6092 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x64), VUnOpMicrokernelTester::OpType::ELU);
6093 }
6094 }
6095
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X64,batch_lt_64)6096 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X64, batch_lt_64) {
6097 TEST_REQUIRES_X86_AVX2;
6098 for (size_t batch_size = 1; batch_size < 64; batch_size++) {
6099 VUnOpMicrokernelTester()
6100 .batch_size(batch_size)
6101 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x64), VUnOpMicrokernelTester::OpType::ELU);
6102 }
6103 }
6104
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X64,batch_gt_64)6105 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X64, batch_gt_64) {
6106 TEST_REQUIRES_X86_AVX2;
6107 for (size_t batch_size = 65; batch_size < 128; batch_size++) {
6108 VUnOpMicrokernelTester()
6109 .batch_size(batch_size)
6110 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x64), VUnOpMicrokernelTester::OpType::ELU);
6111 }
6112 }
6113
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X64,inplace)6114 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X64, inplace) {
6115 TEST_REQUIRES_X86_AVX2;
6116 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
6117 VUnOpMicrokernelTester()
6118 .batch_size(batch_size)
6119 .inplace(true)
6120 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x64), VUnOpMicrokernelTester::OpType::ELU);
6121 }
6122 }
6123
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X64,prescale)6124 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X64, prescale) {
6125 TEST_REQUIRES_X86_AVX2;
6126 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
6127 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
6128 VUnOpMicrokernelTester()
6129 .batch_size(batch_size)
6130 .prescale(prescale)
6131 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x64), VUnOpMicrokernelTester::OpType::ELU);
6132 }
6133 }
6134 }
6135
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X64,alpha)6136 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X64, alpha) {
6137 TEST_REQUIRES_X86_AVX2;
6138 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
6139 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
6140 VUnOpMicrokernelTester()
6141 .batch_size(batch_size)
6142 .alpha(alpha)
6143 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x64), VUnOpMicrokernelTester::OpType::ELU);
6144 }
6145 }
6146 }
6147
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X64,beta)6148 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X64, beta) {
6149 TEST_REQUIRES_X86_AVX2;
6150 for (float beta : std::vector<float>({0.3f, 3.0f})) {
6151 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
6152 VUnOpMicrokernelTester()
6153 .batch_size(batch_size)
6154 .beta(beta)
6155 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x64), VUnOpMicrokernelTester::OpType::ELU);
6156 }
6157 }
6158 }
6159 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6160
6161
6162 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X72,batch_eq_72)6163 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X72, batch_eq_72) {
6164 TEST_REQUIRES_X86_AVX2;
6165 VUnOpMicrokernelTester()
6166 .batch_size(72)
6167 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72), VUnOpMicrokernelTester::OpType::ELU);
6168 }
6169
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X72,batch_div_72)6170 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X72, batch_div_72) {
6171 TEST_REQUIRES_X86_AVX2;
6172 for (size_t batch_size = 144; batch_size < 720; batch_size += 72) {
6173 VUnOpMicrokernelTester()
6174 .batch_size(batch_size)
6175 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72), VUnOpMicrokernelTester::OpType::ELU);
6176 }
6177 }
6178
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X72,batch_lt_72)6179 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X72, batch_lt_72) {
6180 TEST_REQUIRES_X86_AVX2;
6181 for (size_t batch_size = 1; batch_size < 72; batch_size++) {
6182 VUnOpMicrokernelTester()
6183 .batch_size(batch_size)
6184 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72), VUnOpMicrokernelTester::OpType::ELU);
6185 }
6186 }
6187
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X72,batch_gt_72)6188 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X72, batch_gt_72) {
6189 TEST_REQUIRES_X86_AVX2;
6190 for (size_t batch_size = 73; batch_size < 144; batch_size++) {
6191 VUnOpMicrokernelTester()
6192 .batch_size(batch_size)
6193 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72), VUnOpMicrokernelTester::OpType::ELU);
6194 }
6195 }
6196
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X72,inplace)6197 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X72, inplace) {
6198 TEST_REQUIRES_X86_AVX2;
6199 for (size_t batch_size = 1; batch_size <= 360; batch_size += 71) {
6200 VUnOpMicrokernelTester()
6201 .batch_size(batch_size)
6202 .inplace(true)
6203 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72), VUnOpMicrokernelTester::OpType::ELU);
6204 }
6205 }
6206
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X72,prescale)6207 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X72, prescale) {
6208 TEST_REQUIRES_X86_AVX2;
6209 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
6210 for (size_t batch_size = 1; batch_size <= 360; batch_size += 71) {
6211 VUnOpMicrokernelTester()
6212 .batch_size(batch_size)
6213 .prescale(prescale)
6214 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72), VUnOpMicrokernelTester::OpType::ELU);
6215 }
6216 }
6217 }
6218
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X72,alpha)6219 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X72, alpha) {
6220 TEST_REQUIRES_X86_AVX2;
6221 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
6222 for (size_t batch_size = 1; batch_size <= 360; batch_size += 71) {
6223 VUnOpMicrokernelTester()
6224 .batch_size(batch_size)
6225 .alpha(alpha)
6226 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72), VUnOpMicrokernelTester::OpType::ELU);
6227 }
6228 }
6229 }
6230
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X72,beta)6231 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X72, beta) {
6232 TEST_REQUIRES_X86_AVX2;
6233 for (float beta : std::vector<float>({0.3f, 3.0f})) {
6234 for (size_t batch_size = 1; batch_size <= 360; batch_size += 71) {
6235 VUnOpMicrokernelTester()
6236 .batch_size(batch_size)
6237 .beta(beta)
6238 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x72), VUnOpMicrokernelTester::OpType::ELU);
6239 }
6240 }
6241 }
6242 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6243
6244
6245 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X80,batch_eq_80)6246 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X80, batch_eq_80) {
6247 TEST_REQUIRES_X86_AVX2;
6248 VUnOpMicrokernelTester()
6249 .batch_size(80)
6250 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x80), VUnOpMicrokernelTester::OpType::ELU);
6251 }
6252
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X80,batch_div_80)6253 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X80, batch_div_80) {
6254 TEST_REQUIRES_X86_AVX2;
6255 for (size_t batch_size = 160; batch_size < 800; batch_size += 80) {
6256 VUnOpMicrokernelTester()
6257 .batch_size(batch_size)
6258 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x80), VUnOpMicrokernelTester::OpType::ELU);
6259 }
6260 }
6261
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X80,batch_lt_80)6262 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X80, batch_lt_80) {
6263 TEST_REQUIRES_X86_AVX2;
6264 for (size_t batch_size = 1; batch_size < 80; batch_size++) {
6265 VUnOpMicrokernelTester()
6266 .batch_size(batch_size)
6267 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x80), VUnOpMicrokernelTester::OpType::ELU);
6268 }
6269 }
6270
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X80,batch_gt_80)6271 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X80, batch_gt_80) {
6272 TEST_REQUIRES_X86_AVX2;
6273 for (size_t batch_size = 81; batch_size < 160; batch_size++) {
6274 VUnOpMicrokernelTester()
6275 .batch_size(batch_size)
6276 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x80), VUnOpMicrokernelTester::OpType::ELU);
6277 }
6278 }
6279
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X80,inplace)6280 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X80, inplace) {
6281 TEST_REQUIRES_X86_AVX2;
6282 for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
6283 VUnOpMicrokernelTester()
6284 .batch_size(batch_size)
6285 .inplace(true)
6286 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x80), VUnOpMicrokernelTester::OpType::ELU);
6287 }
6288 }
6289
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X80,prescale)6290 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X80, prescale) {
6291 TEST_REQUIRES_X86_AVX2;
6292 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
6293 for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
6294 VUnOpMicrokernelTester()
6295 .batch_size(batch_size)
6296 .prescale(prescale)
6297 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x80), VUnOpMicrokernelTester::OpType::ELU);
6298 }
6299 }
6300 }
6301
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X80,alpha)6302 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X80, alpha) {
6303 TEST_REQUIRES_X86_AVX2;
6304 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
6305 for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
6306 VUnOpMicrokernelTester()
6307 .batch_size(batch_size)
6308 .alpha(alpha)
6309 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x80), VUnOpMicrokernelTester::OpType::ELU);
6310 }
6311 }
6312 }
6313
TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X80,beta)6314 TEST(F32_VELU__AVX2_RR1_LUT4_P4_PERM_X80, beta) {
6315 TEST_REQUIRES_X86_AVX2;
6316 for (float beta : std::vector<float>({0.3f, 3.0f})) {
6317 for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
6318 VUnOpMicrokernelTester()
6319 .batch_size(batch_size)
6320 .beta(beta)
6321 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x80), VUnOpMicrokernelTester::OpType::ELU);
6322 }
6323 }
6324 }
6325 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6326
6327
6328 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X8,batch_eq_8)6329 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X8, batch_eq_8) {
6330 TEST_REQUIRES_X86_AVX2;
6331 VUnOpMicrokernelTester()
6332 .batch_size(8)
6333 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x8), VUnOpMicrokernelTester::OpType::ELU);
6334 }
6335
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X8,batch_div_8)6336 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X8, batch_div_8) {
6337 TEST_REQUIRES_X86_AVX2;
6338 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
6339 VUnOpMicrokernelTester()
6340 .batch_size(batch_size)
6341 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x8), VUnOpMicrokernelTester::OpType::ELU);
6342 }
6343 }
6344
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X8,batch_lt_8)6345 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X8, batch_lt_8) {
6346 TEST_REQUIRES_X86_AVX2;
6347 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
6348 VUnOpMicrokernelTester()
6349 .batch_size(batch_size)
6350 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x8), VUnOpMicrokernelTester::OpType::ELU);
6351 }
6352 }
6353
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X8,batch_gt_8)6354 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X8, batch_gt_8) {
6355 TEST_REQUIRES_X86_AVX2;
6356 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
6357 VUnOpMicrokernelTester()
6358 .batch_size(batch_size)
6359 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x8), VUnOpMicrokernelTester::OpType::ELU);
6360 }
6361 }
6362
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X8,inplace)6363 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X8, inplace) {
6364 TEST_REQUIRES_X86_AVX2;
6365 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
6366 VUnOpMicrokernelTester()
6367 .batch_size(batch_size)
6368 .inplace(true)
6369 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x8), VUnOpMicrokernelTester::OpType::ELU);
6370 }
6371 }
6372
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X8,prescale)6373 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X8, prescale) {
6374 TEST_REQUIRES_X86_AVX2;
6375 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
6376 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
6377 VUnOpMicrokernelTester()
6378 .batch_size(batch_size)
6379 .prescale(prescale)
6380 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x8), VUnOpMicrokernelTester::OpType::ELU);
6381 }
6382 }
6383 }
6384
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X8,alpha)6385 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X8, alpha) {
6386 TEST_REQUIRES_X86_AVX2;
6387 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
6388 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
6389 VUnOpMicrokernelTester()
6390 .batch_size(batch_size)
6391 .alpha(alpha)
6392 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x8), VUnOpMicrokernelTester::OpType::ELU);
6393 }
6394 }
6395 }
6396
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X8,beta)6397 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X8, beta) {
6398 TEST_REQUIRES_X86_AVX2;
6399 for (float beta : std::vector<float>({0.3f, 3.0f})) {
6400 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
6401 VUnOpMicrokernelTester()
6402 .batch_size(batch_size)
6403 .beta(beta)
6404 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x8), VUnOpMicrokernelTester::OpType::ELU);
6405 }
6406 }
6407 }
6408 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6409
6410
6411 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X16,batch_eq_16)6412 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X16, batch_eq_16) {
6413 TEST_REQUIRES_X86_AVX2;
6414 VUnOpMicrokernelTester()
6415 .batch_size(16)
6416 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x16), VUnOpMicrokernelTester::OpType::ELU);
6417 }
6418
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X16,batch_div_16)6419 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X16, batch_div_16) {
6420 TEST_REQUIRES_X86_AVX2;
6421 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
6422 VUnOpMicrokernelTester()
6423 .batch_size(batch_size)
6424 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x16), VUnOpMicrokernelTester::OpType::ELU);
6425 }
6426 }
6427
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X16,batch_lt_16)6428 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X16, batch_lt_16) {
6429 TEST_REQUIRES_X86_AVX2;
6430 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
6431 VUnOpMicrokernelTester()
6432 .batch_size(batch_size)
6433 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x16), VUnOpMicrokernelTester::OpType::ELU);
6434 }
6435 }
6436
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X16,batch_gt_16)6437 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X16, batch_gt_16) {
6438 TEST_REQUIRES_X86_AVX2;
6439 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
6440 VUnOpMicrokernelTester()
6441 .batch_size(batch_size)
6442 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x16), VUnOpMicrokernelTester::OpType::ELU);
6443 }
6444 }
6445
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X16,inplace)6446 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X16, inplace) {
6447 TEST_REQUIRES_X86_AVX2;
6448 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
6449 VUnOpMicrokernelTester()
6450 .batch_size(batch_size)
6451 .inplace(true)
6452 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x16), VUnOpMicrokernelTester::OpType::ELU);
6453 }
6454 }
6455
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X16,prescale)6456 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X16, prescale) {
6457 TEST_REQUIRES_X86_AVX2;
6458 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
6459 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
6460 VUnOpMicrokernelTester()
6461 .batch_size(batch_size)
6462 .prescale(prescale)
6463 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x16), VUnOpMicrokernelTester::OpType::ELU);
6464 }
6465 }
6466 }
6467
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X16,alpha)6468 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X16, alpha) {
6469 TEST_REQUIRES_X86_AVX2;
6470 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
6471 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
6472 VUnOpMicrokernelTester()
6473 .batch_size(batch_size)
6474 .alpha(alpha)
6475 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x16), VUnOpMicrokernelTester::OpType::ELU);
6476 }
6477 }
6478 }
6479
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X16,beta)6480 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X16, beta) {
6481 TEST_REQUIRES_X86_AVX2;
6482 for (float beta : std::vector<float>({0.3f, 3.0f})) {
6483 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
6484 VUnOpMicrokernelTester()
6485 .batch_size(batch_size)
6486 .beta(beta)
6487 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x16), VUnOpMicrokernelTester::OpType::ELU);
6488 }
6489 }
6490 }
6491 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6492
6493
6494 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X24,batch_eq_24)6495 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X24, batch_eq_24) {
6496 TEST_REQUIRES_X86_AVX2;
6497 VUnOpMicrokernelTester()
6498 .batch_size(24)
6499 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x24), VUnOpMicrokernelTester::OpType::ELU);
6500 }
6501
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X24,batch_div_24)6502 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X24, batch_div_24) {
6503 TEST_REQUIRES_X86_AVX2;
6504 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
6505 VUnOpMicrokernelTester()
6506 .batch_size(batch_size)
6507 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x24), VUnOpMicrokernelTester::OpType::ELU);
6508 }
6509 }
6510
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X24,batch_lt_24)6511 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X24, batch_lt_24) {
6512 TEST_REQUIRES_X86_AVX2;
6513 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
6514 VUnOpMicrokernelTester()
6515 .batch_size(batch_size)
6516 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x24), VUnOpMicrokernelTester::OpType::ELU);
6517 }
6518 }
6519
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X24,batch_gt_24)6520 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X24, batch_gt_24) {
6521 TEST_REQUIRES_X86_AVX2;
6522 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
6523 VUnOpMicrokernelTester()
6524 .batch_size(batch_size)
6525 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x24), VUnOpMicrokernelTester::OpType::ELU);
6526 }
6527 }
6528
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X24,inplace)6529 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X24, inplace) {
6530 TEST_REQUIRES_X86_AVX2;
6531 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
6532 VUnOpMicrokernelTester()
6533 .batch_size(batch_size)
6534 .inplace(true)
6535 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x24), VUnOpMicrokernelTester::OpType::ELU);
6536 }
6537 }
6538
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X24,prescale)6539 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X24, prescale) {
6540 TEST_REQUIRES_X86_AVX2;
6541 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
6542 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
6543 VUnOpMicrokernelTester()
6544 .batch_size(batch_size)
6545 .prescale(prescale)
6546 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x24), VUnOpMicrokernelTester::OpType::ELU);
6547 }
6548 }
6549 }
6550
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X24,alpha)6551 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X24, alpha) {
6552 TEST_REQUIRES_X86_AVX2;
6553 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
6554 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
6555 VUnOpMicrokernelTester()
6556 .batch_size(batch_size)
6557 .alpha(alpha)
6558 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x24), VUnOpMicrokernelTester::OpType::ELU);
6559 }
6560 }
6561 }
6562
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X24,beta)6563 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X24, beta) {
6564 TEST_REQUIRES_X86_AVX2;
6565 for (float beta : std::vector<float>({0.3f, 3.0f})) {
6566 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
6567 VUnOpMicrokernelTester()
6568 .batch_size(batch_size)
6569 .beta(beta)
6570 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x24), VUnOpMicrokernelTester::OpType::ELU);
6571 }
6572 }
6573 }
6574 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6575
6576
6577 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X32,batch_eq_32)6578 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X32, batch_eq_32) {
6579 TEST_REQUIRES_X86_AVX2;
6580 VUnOpMicrokernelTester()
6581 .batch_size(32)
6582 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x32), VUnOpMicrokernelTester::OpType::ELU);
6583 }
6584
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X32,batch_div_32)6585 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X32, batch_div_32) {
6586 TEST_REQUIRES_X86_AVX2;
6587 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
6588 VUnOpMicrokernelTester()
6589 .batch_size(batch_size)
6590 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x32), VUnOpMicrokernelTester::OpType::ELU);
6591 }
6592 }
6593
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X32,batch_lt_32)6594 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X32, batch_lt_32) {
6595 TEST_REQUIRES_X86_AVX2;
6596 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
6597 VUnOpMicrokernelTester()
6598 .batch_size(batch_size)
6599 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x32), VUnOpMicrokernelTester::OpType::ELU);
6600 }
6601 }
6602
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X32,batch_gt_32)6603 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X32, batch_gt_32) {
6604 TEST_REQUIRES_X86_AVX2;
6605 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
6606 VUnOpMicrokernelTester()
6607 .batch_size(batch_size)
6608 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x32), VUnOpMicrokernelTester::OpType::ELU);
6609 }
6610 }
6611
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X32,inplace)6612 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X32, inplace) {
6613 TEST_REQUIRES_X86_AVX2;
6614 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
6615 VUnOpMicrokernelTester()
6616 .batch_size(batch_size)
6617 .inplace(true)
6618 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x32), VUnOpMicrokernelTester::OpType::ELU);
6619 }
6620 }
6621
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X32,prescale)6622 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X32, prescale) {
6623 TEST_REQUIRES_X86_AVX2;
6624 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
6625 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
6626 VUnOpMicrokernelTester()
6627 .batch_size(batch_size)
6628 .prescale(prescale)
6629 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x32), VUnOpMicrokernelTester::OpType::ELU);
6630 }
6631 }
6632 }
6633
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X32,alpha)6634 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X32, alpha) {
6635 TEST_REQUIRES_X86_AVX2;
6636 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
6637 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
6638 VUnOpMicrokernelTester()
6639 .batch_size(batch_size)
6640 .alpha(alpha)
6641 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x32), VUnOpMicrokernelTester::OpType::ELU);
6642 }
6643 }
6644 }
6645
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X32,beta)6646 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X32, beta) {
6647 TEST_REQUIRES_X86_AVX2;
6648 for (float beta : std::vector<float>({0.3f, 3.0f})) {
6649 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
6650 VUnOpMicrokernelTester()
6651 .batch_size(batch_size)
6652 .beta(beta)
6653 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x32), VUnOpMicrokernelTester::OpType::ELU);
6654 }
6655 }
6656 }
6657 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6658
6659
6660 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X40,batch_eq_40)6661 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X40, batch_eq_40) {
6662 TEST_REQUIRES_X86_AVX2;
6663 VUnOpMicrokernelTester()
6664 .batch_size(40)
6665 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x40), VUnOpMicrokernelTester::OpType::ELU);
6666 }
6667
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X40,batch_div_40)6668 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X40, batch_div_40) {
6669 TEST_REQUIRES_X86_AVX2;
6670 for (size_t batch_size = 80; batch_size < 400; batch_size += 40) {
6671 VUnOpMicrokernelTester()
6672 .batch_size(batch_size)
6673 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x40), VUnOpMicrokernelTester::OpType::ELU);
6674 }
6675 }
6676
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X40,batch_lt_40)6677 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X40, batch_lt_40) {
6678 TEST_REQUIRES_X86_AVX2;
6679 for (size_t batch_size = 1; batch_size < 40; batch_size++) {
6680 VUnOpMicrokernelTester()
6681 .batch_size(batch_size)
6682 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x40), VUnOpMicrokernelTester::OpType::ELU);
6683 }
6684 }
6685
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X40,batch_gt_40)6686 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X40, batch_gt_40) {
6687 TEST_REQUIRES_X86_AVX2;
6688 for (size_t batch_size = 41; batch_size < 80; batch_size++) {
6689 VUnOpMicrokernelTester()
6690 .batch_size(batch_size)
6691 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x40), VUnOpMicrokernelTester::OpType::ELU);
6692 }
6693 }
6694
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X40,inplace)6695 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X40, inplace) {
6696 TEST_REQUIRES_X86_AVX2;
6697 for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
6698 VUnOpMicrokernelTester()
6699 .batch_size(batch_size)
6700 .inplace(true)
6701 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x40), VUnOpMicrokernelTester::OpType::ELU);
6702 }
6703 }
6704
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X40,prescale)6705 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X40, prescale) {
6706 TEST_REQUIRES_X86_AVX2;
6707 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
6708 for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
6709 VUnOpMicrokernelTester()
6710 .batch_size(batch_size)
6711 .prescale(prescale)
6712 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x40), VUnOpMicrokernelTester::OpType::ELU);
6713 }
6714 }
6715 }
6716
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X40,alpha)6717 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X40, alpha) {
6718 TEST_REQUIRES_X86_AVX2;
6719 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
6720 for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
6721 VUnOpMicrokernelTester()
6722 .batch_size(batch_size)
6723 .alpha(alpha)
6724 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x40), VUnOpMicrokernelTester::OpType::ELU);
6725 }
6726 }
6727 }
6728
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X40,beta)6729 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X40, beta) {
6730 TEST_REQUIRES_X86_AVX2;
6731 for (float beta : std::vector<float>({0.3f, 3.0f})) {
6732 for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
6733 VUnOpMicrokernelTester()
6734 .batch_size(batch_size)
6735 .beta(beta)
6736 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x40), VUnOpMicrokernelTester::OpType::ELU);
6737 }
6738 }
6739 }
6740 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6741
6742
6743 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X48,batch_eq_48)6744 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X48, batch_eq_48) {
6745 TEST_REQUIRES_X86_AVX2;
6746 VUnOpMicrokernelTester()
6747 .batch_size(48)
6748 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x48), VUnOpMicrokernelTester::OpType::ELU);
6749 }
6750
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X48,batch_div_48)6751 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X48, batch_div_48) {
6752 TEST_REQUIRES_X86_AVX2;
6753 for (size_t batch_size = 96; batch_size < 480; batch_size += 48) {
6754 VUnOpMicrokernelTester()
6755 .batch_size(batch_size)
6756 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x48), VUnOpMicrokernelTester::OpType::ELU);
6757 }
6758 }
6759
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X48,batch_lt_48)6760 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X48, batch_lt_48) {
6761 TEST_REQUIRES_X86_AVX2;
6762 for (size_t batch_size = 1; batch_size < 48; batch_size++) {
6763 VUnOpMicrokernelTester()
6764 .batch_size(batch_size)
6765 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x48), VUnOpMicrokernelTester::OpType::ELU);
6766 }
6767 }
6768
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X48,batch_gt_48)6769 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X48, batch_gt_48) {
6770 TEST_REQUIRES_X86_AVX2;
6771 for (size_t batch_size = 49; batch_size < 96; batch_size++) {
6772 VUnOpMicrokernelTester()
6773 .batch_size(batch_size)
6774 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x48), VUnOpMicrokernelTester::OpType::ELU);
6775 }
6776 }
6777
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X48,inplace)6778 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X48, inplace) {
6779 TEST_REQUIRES_X86_AVX2;
6780 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
6781 VUnOpMicrokernelTester()
6782 .batch_size(batch_size)
6783 .inplace(true)
6784 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x48), VUnOpMicrokernelTester::OpType::ELU);
6785 }
6786 }
6787
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X48,prescale)6788 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X48, prescale) {
6789 TEST_REQUIRES_X86_AVX2;
6790 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
6791 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
6792 VUnOpMicrokernelTester()
6793 .batch_size(batch_size)
6794 .prescale(prescale)
6795 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x48), VUnOpMicrokernelTester::OpType::ELU);
6796 }
6797 }
6798 }
6799
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X48,alpha)6800 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X48, alpha) {
6801 TEST_REQUIRES_X86_AVX2;
6802 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
6803 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
6804 VUnOpMicrokernelTester()
6805 .batch_size(batch_size)
6806 .alpha(alpha)
6807 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x48), VUnOpMicrokernelTester::OpType::ELU);
6808 }
6809 }
6810 }
6811
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X48,beta)6812 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X48, beta) {
6813 TEST_REQUIRES_X86_AVX2;
6814 for (float beta : std::vector<float>({0.3f, 3.0f})) {
6815 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
6816 VUnOpMicrokernelTester()
6817 .batch_size(batch_size)
6818 .beta(beta)
6819 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x48), VUnOpMicrokernelTester::OpType::ELU);
6820 }
6821 }
6822 }
6823 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6824
6825
6826 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X56,batch_eq_56)6827 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X56, batch_eq_56) {
6828 TEST_REQUIRES_X86_AVX2;
6829 VUnOpMicrokernelTester()
6830 .batch_size(56)
6831 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56), VUnOpMicrokernelTester::OpType::ELU);
6832 }
6833
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X56,batch_div_56)6834 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X56, batch_div_56) {
6835 TEST_REQUIRES_X86_AVX2;
6836 for (size_t batch_size = 112; batch_size < 560; batch_size += 56) {
6837 VUnOpMicrokernelTester()
6838 .batch_size(batch_size)
6839 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56), VUnOpMicrokernelTester::OpType::ELU);
6840 }
6841 }
6842
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X56,batch_lt_56)6843 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X56, batch_lt_56) {
6844 TEST_REQUIRES_X86_AVX2;
6845 for (size_t batch_size = 1; batch_size < 56; batch_size++) {
6846 VUnOpMicrokernelTester()
6847 .batch_size(batch_size)
6848 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56), VUnOpMicrokernelTester::OpType::ELU);
6849 }
6850 }
6851
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X56,batch_gt_56)6852 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X56, batch_gt_56) {
6853 TEST_REQUIRES_X86_AVX2;
6854 for (size_t batch_size = 57; batch_size < 112; batch_size++) {
6855 VUnOpMicrokernelTester()
6856 .batch_size(batch_size)
6857 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56), VUnOpMicrokernelTester::OpType::ELU);
6858 }
6859 }
6860
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X56,inplace)6861 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X56, inplace) {
6862 TEST_REQUIRES_X86_AVX2;
6863 for (size_t batch_size = 1; batch_size <= 280; batch_size += 55) {
6864 VUnOpMicrokernelTester()
6865 .batch_size(batch_size)
6866 .inplace(true)
6867 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56), VUnOpMicrokernelTester::OpType::ELU);
6868 }
6869 }
6870
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X56,prescale)6871 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X56, prescale) {
6872 TEST_REQUIRES_X86_AVX2;
6873 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
6874 for (size_t batch_size = 1; batch_size <= 280; batch_size += 55) {
6875 VUnOpMicrokernelTester()
6876 .batch_size(batch_size)
6877 .prescale(prescale)
6878 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56), VUnOpMicrokernelTester::OpType::ELU);
6879 }
6880 }
6881 }
6882
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X56,alpha)6883 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X56, alpha) {
6884 TEST_REQUIRES_X86_AVX2;
6885 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
6886 for (size_t batch_size = 1; batch_size <= 280; batch_size += 55) {
6887 VUnOpMicrokernelTester()
6888 .batch_size(batch_size)
6889 .alpha(alpha)
6890 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56), VUnOpMicrokernelTester::OpType::ELU);
6891 }
6892 }
6893 }
6894
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X56,beta)6895 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X56, beta) {
6896 TEST_REQUIRES_X86_AVX2;
6897 for (float beta : std::vector<float>({0.3f, 3.0f})) {
6898 for (size_t batch_size = 1; batch_size <= 280; batch_size += 55) {
6899 VUnOpMicrokernelTester()
6900 .batch_size(batch_size)
6901 .beta(beta)
6902 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x56), VUnOpMicrokernelTester::OpType::ELU);
6903 }
6904 }
6905 }
6906 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6907
6908
6909 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X64,batch_eq_64)6910 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X64, batch_eq_64) {
6911 TEST_REQUIRES_X86_AVX2;
6912 VUnOpMicrokernelTester()
6913 .batch_size(64)
6914 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x64), VUnOpMicrokernelTester::OpType::ELU);
6915 }
6916
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X64,batch_div_64)6917 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X64, batch_div_64) {
6918 TEST_REQUIRES_X86_AVX2;
6919 for (size_t batch_size = 128; batch_size < 640; batch_size += 64) {
6920 VUnOpMicrokernelTester()
6921 .batch_size(batch_size)
6922 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x64), VUnOpMicrokernelTester::OpType::ELU);
6923 }
6924 }
6925
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X64,batch_lt_64)6926 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X64, batch_lt_64) {
6927 TEST_REQUIRES_X86_AVX2;
6928 for (size_t batch_size = 1; batch_size < 64; batch_size++) {
6929 VUnOpMicrokernelTester()
6930 .batch_size(batch_size)
6931 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x64), VUnOpMicrokernelTester::OpType::ELU);
6932 }
6933 }
6934
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X64,batch_gt_64)6935 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X64, batch_gt_64) {
6936 TEST_REQUIRES_X86_AVX2;
6937 for (size_t batch_size = 65; batch_size < 128; batch_size++) {
6938 VUnOpMicrokernelTester()
6939 .batch_size(batch_size)
6940 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x64), VUnOpMicrokernelTester::OpType::ELU);
6941 }
6942 }
6943
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X64,inplace)6944 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X64, inplace) {
6945 TEST_REQUIRES_X86_AVX2;
6946 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
6947 VUnOpMicrokernelTester()
6948 .batch_size(batch_size)
6949 .inplace(true)
6950 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x64), VUnOpMicrokernelTester::OpType::ELU);
6951 }
6952 }
6953
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X64,prescale)6954 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X64, prescale) {
6955 TEST_REQUIRES_X86_AVX2;
6956 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
6957 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
6958 VUnOpMicrokernelTester()
6959 .batch_size(batch_size)
6960 .prescale(prescale)
6961 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x64), VUnOpMicrokernelTester::OpType::ELU);
6962 }
6963 }
6964 }
6965
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X64,alpha)6966 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X64, alpha) {
6967 TEST_REQUIRES_X86_AVX2;
6968 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
6969 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
6970 VUnOpMicrokernelTester()
6971 .batch_size(batch_size)
6972 .alpha(alpha)
6973 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x64), VUnOpMicrokernelTester::OpType::ELU);
6974 }
6975 }
6976 }
6977
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X64,beta)6978 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X64, beta) {
6979 TEST_REQUIRES_X86_AVX2;
6980 for (float beta : std::vector<float>({0.3f, 3.0f})) {
6981 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
6982 VUnOpMicrokernelTester()
6983 .batch_size(batch_size)
6984 .beta(beta)
6985 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x64), VUnOpMicrokernelTester::OpType::ELU);
6986 }
6987 }
6988 }
6989 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
6990
6991
6992 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X72,batch_eq_72)6993 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X72, batch_eq_72) {
6994 TEST_REQUIRES_X86_AVX2;
6995 VUnOpMicrokernelTester()
6996 .batch_size(72)
6997 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72), VUnOpMicrokernelTester::OpType::ELU);
6998 }
6999
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X72,batch_div_72)7000 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X72, batch_div_72) {
7001 TEST_REQUIRES_X86_AVX2;
7002 for (size_t batch_size = 144; batch_size < 720; batch_size += 72) {
7003 VUnOpMicrokernelTester()
7004 .batch_size(batch_size)
7005 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72), VUnOpMicrokernelTester::OpType::ELU);
7006 }
7007 }
7008
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X72,batch_lt_72)7009 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X72, batch_lt_72) {
7010 TEST_REQUIRES_X86_AVX2;
7011 for (size_t batch_size = 1; batch_size < 72; batch_size++) {
7012 VUnOpMicrokernelTester()
7013 .batch_size(batch_size)
7014 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72), VUnOpMicrokernelTester::OpType::ELU);
7015 }
7016 }
7017
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X72,batch_gt_72)7018 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X72, batch_gt_72) {
7019 TEST_REQUIRES_X86_AVX2;
7020 for (size_t batch_size = 73; batch_size < 144; batch_size++) {
7021 VUnOpMicrokernelTester()
7022 .batch_size(batch_size)
7023 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72), VUnOpMicrokernelTester::OpType::ELU);
7024 }
7025 }
7026
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X72,inplace)7027 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X72, inplace) {
7028 TEST_REQUIRES_X86_AVX2;
7029 for (size_t batch_size = 1; batch_size <= 360; batch_size += 71) {
7030 VUnOpMicrokernelTester()
7031 .batch_size(batch_size)
7032 .inplace(true)
7033 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72), VUnOpMicrokernelTester::OpType::ELU);
7034 }
7035 }
7036
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X72,prescale)7037 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X72, prescale) {
7038 TEST_REQUIRES_X86_AVX2;
7039 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
7040 for (size_t batch_size = 1; batch_size <= 360; batch_size += 71) {
7041 VUnOpMicrokernelTester()
7042 .batch_size(batch_size)
7043 .prescale(prescale)
7044 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72), VUnOpMicrokernelTester::OpType::ELU);
7045 }
7046 }
7047 }
7048
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X72,alpha)7049 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X72, alpha) {
7050 TEST_REQUIRES_X86_AVX2;
7051 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
7052 for (size_t batch_size = 1; batch_size <= 360; batch_size += 71) {
7053 VUnOpMicrokernelTester()
7054 .batch_size(batch_size)
7055 .alpha(alpha)
7056 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72), VUnOpMicrokernelTester::OpType::ELU);
7057 }
7058 }
7059 }
7060
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X72,beta)7061 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X72, beta) {
7062 TEST_REQUIRES_X86_AVX2;
7063 for (float beta : std::vector<float>({0.3f, 3.0f})) {
7064 for (size_t batch_size = 1; batch_size <= 360; batch_size += 71) {
7065 VUnOpMicrokernelTester()
7066 .batch_size(batch_size)
7067 .beta(beta)
7068 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x72), VUnOpMicrokernelTester::OpType::ELU);
7069 }
7070 }
7071 }
7072 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7073
7074
7075 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X80,batch_eq_80)7076 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X80, batch_eq_80) {
7077 TEST_REQUIRES_X86_AVX2;
7078 VUnOpMicrokernelTester()
7079 .batch_size(80)
7080 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80), VUnOpMicrokernelTester::OpType::ELU);
7081 }
7082
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X80,batch_div_80)7083 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X80, batch_div_80) {
7084 TEST_REQUIRES_X86_AVX2;
7085 for (size_t batch_size = 160; batch_size < 800; batch_size += 80) {
7086 VUnOpMicrokernelTester()
7087 .batch_size(batch_size)
7088 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80), VUnOpMicrokernelTester::OpType::ELU);
7089 }
7090 }
7091
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X80,batch_lt_80)7092 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X80, batch_lt_80) {
7093 TEST_REQUIRES_X86_AVX2;
7094 for (size_t batch_size = 1; batch_size < 80; batch_size++) {
7095 VUnOpMicrokernelTester()
7096 .batch_size(batch_size)
7097 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80), VUnOpMicrokernelTester::OpType::ELU);
7098 }
7099 }
7100
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X80,batch_gt_80)7101 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X80, batch_gt_80) {
7102 TEST_REQUIRES_X86_AVX2;
7103 for (size_t batch_size = 81; batch_size < 160; batch_size++) {
7104 VUnOpMicrokernelTester()
7105 .batch_size(batch_size)
7106 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80), VUnOpMicrokernelTester::OpType::ELU);
7107 }
7108 }
7109
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X80,inplace)7110 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X80, inplace) {
7111 TEST_REQUIRES_X86_AVX2;
7112 for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
7113 VUnOpMicrokernelTester()
7114 .batch_size(batch_size)
7115 .inplace(true)
7116 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80), VUnOpMicrokernelTester::OpType::ELU);
7117 }
7118 }
7119
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X80,prescale)7120 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X80, prescale) {
7121 TEST_REQUIRES_X86_AVX2;
7122 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
7123 for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
7124 VUnOpMicrokernelTester()
7125 .batch_size(batch_size)
7126 .prescale(prescale)
7127 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80), VUnOpMicrokernelTester::OpType::ELU);
7128 }
7129 }
7130 }
7131
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X80,alpha)7132 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X80, alpha) {
7133 TEST_REQUIRES_X86_AVX2;
7134 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
7135 for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
7136 VUnOpMicrokernelTester()
7137 .batch_size(batch_size)
7138 .alpha(alpha)
7139 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80), VUnOpMicrokernelTester::OpType::ELU);
7140 }
7141 }
7142 }
7143
TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X80,beta)7144 TEST(F32_VELU__AVX2_RR1_LUT8_P4_PERM_X80, beta) {
7145 TEST_REQUIRES_X86_AVX2;
7146 for (float beta : std::vector<float>({0.3f, 3.0f})) {
7147 for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
7148 VUnOpMicrokernelTester()
7149 .batch_size(batch_size)
7150 .beta(beta)
7151 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x80), VUnOpMicrokernelTester::OpType::ELU);
7152 }
7153 }
7154 }
7155 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7156
7157
7158 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X8,batch_eq_8)7159 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X8, batch_eq_8) {
7160 TEST_REQUIRES_X86_AVX2;
7161 VUnOpMicrokernelTester()
7162 .batch_size(8)
7163 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x8), VUnOpMicrokernelTester::OpType::ELU);
7164 }
7165
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X8,batch_div_8)7166 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X8, batch_div_8) {
7167 TEST_REQUIRES_X86_AVX2;
7168 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
7169 VUnOpMicrokernelTester()
7170 .batch_size(batch_size)
7171 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x8), VUnOpMicrokernelTester::OpType::ELU);
7172 }
7173 }
7174
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X8,batch_lt_8)7175 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X8, batch_lt_8) {
7176 TEST_REQUIRES_X86_AVX2;
7177 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
7178 VUnOpMicrokernelTester()
7179 .batch_size(batch_size)
7180 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x8), VUnOpMicrokernelTester::OpType::ELU);
7181 }
7182 }
7183
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X8,batch_gt_8)7184 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X8, batch_gt_8) {
7185 TEST_REQUIRES_X86_AVX2;
7186 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
7187 VUnOpMicrokernelTester()
7188 .batch_size(batch_size)
7189 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x8), VUnOpMicrokernelTester::OpType::ELU);
7190 }
7191 }
7192
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X8,inplace)7193 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X8, inplace) {
7194 TEST_REQUIRES_X86_AVX2;
7195 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
7196 VUnOpMicrokernelTester()
7197 .batch_size(batch_size)
7198 .inplace(true)
7199 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x8), VUnOpMicrokernelTester::OpType::ELU);
7200 }
7201 }
7202
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X8,prescale)7203 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X8, prescale) {
7204 TEST_REQUIRES_X86_AVX2;
7205 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
7206 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
7207 VUnOpMicrokernelTester()
7208 .batch_size(batch_size)
7209 .prescale(prescale)
7210 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x8), VUnOpMicrokernelTester::OpType::ELU);
7211 }
7212 }
7213 }
7214
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X8,alpha)7215 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X8, alpha) {
7216 TEST_REQUIRES_X86_AVX2;
7217 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
7218 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
7219 VUnOpMicrokernelTester()
7220 .batch_size(batch_size)
7221 .alpha(alpha)
7222 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x8), VUnOpMicrokernelTester::OpType::ELU);
7223 }
7224 }
7225 }
7226
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X8,beta)7227 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X8, beta) {
7228 TEST_REQUIRES_X86_AVX2;
7229 for (float beta : std::vector<float>({0.3f, 3.0f})) {
7230 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
7231 VUnOpMicrokernelTester()
7232 .batch_size(batch_size)
7233 .beta(beta)
7234 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x8), VUnOpMicrokernelTester::OpType::ELU);
7235 }
7236 }
7237 }
7238 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7239
7240
7241 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X16,batch_eq_16)7242 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X16, batch_eq_16) {
7243 TEST_REQUIRES_X86_AVX2;
7244 VUnOpMicrokernelTester()
7245 .batch_size(16)
7246 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x16), VUnOpMicrokernelTester::OpType::ELU);
7247 }
7248
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X16,batch_div_16)7249 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X16, batch_div_16) {
7250 TEST_REQUIRES_X86_AVX2;
7251 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
7252 VUnOpMicrokernelTester()
7253 .batch_size(batch_size)
7254 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x16), VUnOpMicrokernelTester::OpType::ELU);
7255 }
7256 }
7257
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X16,batch_lt_16)7258 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X16, batch_lt_16) {
7259 TEST_REQUIRES_X86_AVX2;
7260 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
7261 VUnOpMicrokernelTester()
7262 .batch_size(batch_size)
7263 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x16), VUnOpMicrokernelTester::OpType::ELU);
7264 }
7265 }
7266
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X16,batch_gt_16)7267 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X16, batch_gt_16) {
7268 TEST_REQUIRES_X86_AVX2;
7269 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
7270 VUnOpMicrokernelTester()
7271 .batch_size(batch_size)
7272 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x16), VUnOpMicrokernelTester::OpType::ELU);
7273 }
7274 }
7275
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X16,inplace)7276 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X16, inplace) {
7277 TEST_REQUIRES_X86_AVX2;
7278 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
7279 VUnOpMicrokernelTester()
7280 .batch_size(batch_size)
7281 .inplace(true)
7282 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x16), VUnOpMicrokernelTester::OpType::ELU);
7283 }
7284 }
7285
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X16,prescale)7286 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X16, prescale) {
7287 TEST_REQUIRES_X86_AVX2;
7288 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
7289 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
7290 VUnOpMicrokernelTester()
7291 .batch_size(batch_size)
7292 .prescale(prescale)
7293 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x16), VUnOpMicrokernelTester::OpType::ELU);
7294 }
7295 }
7296 }
7297
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X16,alpha)7298 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X16, alpha) {
7299 TEST_REQUIRES_X86_AVX2;
7300 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
7301 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
7302 VUnOpMicrokernelTester()
7303 .batch_size(batch_size)
7304 .alpha(alpha)
7305 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x16), VUnOpMicrokernelTester::OpType::ELU);
7306 }
7307 }
7308 }
7309
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X16,beta)7310 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X16, beta) {
7311 TEST_REQUIRES_X86_AVX2;
7312 for (float beta : std::vector<float>({0.3f, 3.0f})) {
7313 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
7314 VUnOpMicrokernelTester()
7315 .batch_size(batch_size)
7316 .beta(beta)
7317 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x16), VUnOpMicrokernelTester::OpType::ELU);
7318 }
7319 }
7320 }
7321 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7322
7323
7324 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X24,batch_eq_24)7325 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X24, batch_eq_24) {
7326 TEST_REQUIRES_X86_AVX2;
7327 VUnOpMicrokernelTester()
7328 .batch_size(24)
7329 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x24), VUnOpMicrokernelTester::OpType::ELU);
7330 }
7331
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X24,batch_div_24)7332 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X24, batch_div_24) {
7333 TEST_REQUIRES_X86_AVX2;
7334 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
7335 VUnOpMicrokernelTester()
7336 .batch_size(batch_size)
7337 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x24), VUnOpMicrokernelTester::OpType::ELU);
7338 }
7339 }
7340
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X24,batch_lt_24)7341 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X24, batch_lt_24) {
7342 TEST_REQUIRES_X86_AVX2;
7343 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
7344 VUnOpMicrokernelTester()
7345 .batch_size(batch_size)
7346 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x24), VUnOpMicrokernelTester::OpType::ELU);
7347 }
7348 }
7349
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X24,batch_gt_24)7350 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X24, batch_gt_24) {
7351 TEST_REQUIRES_X86_AVX2;
7352 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
7353 VUnOpMicrokernelTester()
7354 .batch_size(batch_size)
7355 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x24), VUnOpMicrokernelTester::OpType::ELU);
7356 }
7357 }
7358
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X24,inplace)7359 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X24, inplace) {
7360 TEST_REQUIRES_X86_AVX2;
7361 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
7362 VUnOpMicrokernelTester()
7363 .batch_size(batch_size)
7364 .inplace(true)
7365 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x24), VUnOpMicrokernelTester::OpType::ELU);
7366 }
7367 }
7368
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X24,prescale)7369 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X24, prescale) {
7370 TEST_REQUIRES_X86_AVX2;
7371 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
7372 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
7373 VUnOpMicrokernelTester()
7374 .batch_size(batch_size)
7375 .prescale(prescale)
7376 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x24), VUnOpMicrokernelTester::OpType::ELU);
7377 }
7378 }
7379 }
7380
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X24,alpha)7381 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X24, alpha) {
7382 TEST_REQUIRES_X86_AVX2;
7383 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
7384 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
7385 VUnOpMicrokernelTester()
7386 .batch_size(batch_size)
7387 .alpha(alpha)
7388 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x24), VUnOpMicrokernelTester::OpType::ELU);
7389 }
7390 }
7391 }
7392
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X24,beta)7393 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X24, beta) {
7394 TEST_REQUIRES_X86_AVX2;
7395 for (float beta : std::vector<float>({0.3f, 3.0f})) {
7396 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
7397 VUnOpMicrokernelTester()
7398 .batch_size(batch_size)
7399 .beta(beta)
7400 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x24), VUnOpMicrokernelTester::OpType::ELU);
7401 }
7402 }
7403 }
7404 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7405
7406
7407 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X32,batch_eq_32)7408 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X32, batch_eq_32) {
7409 TEST_REQUIRES_X86_AVX2;
7410 VUnOpMicrokernelTester()
7411 .batch_size(32)
7412 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x32), VUnOpMicrokernelTester::OpType::ELU);
7413 }
7414
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X32,batch_div_32)7415 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X32, batch_div_32) {
7416 TEST_REQUIRES_X86_AVX2;
7417 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
7418 VUnOpMicrokernelTester()
7419 .batch_size(batch_size)
7420 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x32), VUnOpMicrokernelTester::OpType::ELU);
7421 }
7422 }
7423
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X32,batch_lt_32)7424 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X32, batch_lt_32) {
7425 TEST_REQUIRES_X86_AVX2;
7426 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
7427 VUnOpMicrokernelTester()
7428 .batch_size(batch_size)
7429 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x32), VUnOpMicrokernelTester::OpType::ELU);
7430 }
7431 }
7432
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X32,batch_gt_32)7433 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X32, batch_gt_32) {
7434 TEST_REQUIRES_X86_AVX2;
7435 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
7436 VUnOpMicrokernelTester()
7437 .batch_size(batch_size)
7438 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x32), VUnOpMicrokernelTester::OpType::ELU);
7439 }
7440 }
7441
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X32,inplace)7442 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X32, inplace) {
7443 TEST_REQUIRES_X86_AVX2;
7444 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
7445 VUnOpMicrokernelTester()
7446 .batch_size(batch_size)
7447 .inplace(true)
7448 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x32), VUnOpMicrokernelTester::OpType::ELU);
7449 }
7450 }
7451
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X32,prescale)7452 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X32, prescale) {
7453 TEST_REQUIRES_X86_AVX2;
7454 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
7455 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
7456 VUnOpMicrokernelTester()
7457 .batch_size(batch_size)
7458 .prescale(prescale)
7459 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x32), VUnOpMicrokernelTester::OpType::ELU);
7460 }
7461 }
7462 }
7463
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X32,alpha)7464 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X32, alpha) {
7465 TEST_REQUIRES_X86_AVX2;
7466 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
7467 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
7468 VUnOpMicrokernelTester()
7469 .batch_size(batch_size)
7470 .alpha(alpha)
7471 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x32), VUnOpMicrokernelTester::OpType::ELU);
7472 }
7473 }
7474 }
7475
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X32,beta)7476 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X32, beta) {
7477 TEST_REQUIRES_X86_AVX2;
7478 for (float beta : std::vector<float>({0.3f, 3.0f})) {
7479 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
7480 VUnOpMicrokernelTester()
7481 .batch_size(batch_size)
7482 .beta(beta)
7483 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x32), VUnOpMicrokernelTester::OpType::ELU);
7484 }
7485 }
7486 }
7487 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7488
7489
7490 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X40,batch_eq_40)7491 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X40, batch_eq_40) {
7492 TEST_REQUIRES_X86_AVX2;
7493 VUnOpMicrokernelTester()
7494 .batch_size(40)
7495 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x40), VUnOpMicrokernelTester::OpType::ELU);
7496 }
7497
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X40,batch_div_40)7498 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X40, batch_div_40) {
7499 TEST_REQUIRES_X86_AVX2;
7500 for (size_t batch_size = 80; batch_size < 400; batch_size += 40) {
7501 VUnOpMicrokernelTester()
7502 .batch_size(batch_size)
7503 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x40), VUnOpMicrokernelTester::OpType::ELU);
7504 }
7505 }
7506
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X40,batch_lt_40)7507 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X40, batch_lt_40) {
7508 TEST_REQUIRES_X86_AVX2;
7509 for (size_t batch_size = 1; batch_size < 40; batch_size++) {
7510 VUnOpMicrokernelTester()
7511 .batch_size(batch_size)
7512 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x40), VUnOpMicrokernelTester::OpType::ELU);
7513 }
7514 }
7515
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X40,batch_gt_40)7516 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X40, batch_gt_40) {
7517 TEST_REQUIRES_X86_AVX2;
7518 for (size_t batch_size = 41; batch_size < 80; batch_size++) {
7519 VUnOpMicrokernelTester()
7520 .batch_size(batch_size)
7521 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x40), VUnOpMicrokernelTester::OpType::ELU);
7522 }
7523 }
7524
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X40,inplace)7525 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X40, inplace) {
7526 TEST_REQUIRES_X86_AVX2;
7527 for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
7528 VUnOpMicrokernelTester()
7529 .batch_size(batch_size)
7530 .inplace(true)
7531 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x40), VUnOpMicrokernelTester::OpType::ELU);
7532 }
7533 }
7534
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X40,prescale)7535 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X40, prescale) {
7536 TEST_REQUIRES_X86_AVX2;
7537 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
7538 for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
7539 VUnOpMicrokernelTester()
7540 .batch_size(batch_size)
7541 .prescale(prescale)
7542 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x40), VUnOpMicrokernelTester::OpType::ELU);
7543 }
7544 }
7545 }
7546
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X40,alpha)7547 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X40, alpha) {
7548 TEST_REQUIRES_X86_AVX2;
7549 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
7550 for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
7551 VUnOpMicrokernelTester()
7552 .batch_size(batch_size)
7553 .alpha(alpha)
7554 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x40), VUnOpMicrokernelTester::OpType::ELU);
7555 }
7556 }
7557 }
7558
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X40,beta)7559 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X40, beta) {
7560 TEST_REQUIRES_X86_AVX2;
7561 for (float beta : std::vector<float>({0.3f, 3.0f})) {
7562 for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
7563 VUnOpMicrokernelTester()
7564 .batch_size(batch_size)
7565 .beta(beta)
7566 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x40), VUnOpMicrokernelTester::OpType::ELU);
7567 }
7568 }
7569 }
7570 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7571
7572
7573 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X48,batch_eq_48)7574 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X48, batch_eq_48) {
7575 TEST_REQUIRES_X86_AVX2;
7576 VUnOpMicrokernelTester()
7577 .batch_size(48)
7578 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x48), VUnOpMicrokernelTester::OpType::ELU);
7579 }
7580
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X48,batch_div_48)7581 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X48, batch_div_48) {
7582 TEST_REQUIRES_X86_AVX2;
7583 for (size_t batch_size = 96; batch_size < 480; batch_size += 48) {
7584 VUnOpMicrokernelTester()
7585 .batch_size(batch_size)
7586 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x48), VUnOpMicrokernelTester::OpType::ELU);
7587 }
7588 }
7589
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X48,batch_lt_48)7590 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X48, batch_lt_48) {
7591 TEST_REQUIRES_X86_AVX2;
7592 for (size_t batch_size = 1; batch_size < 48; batch_size++) {
7593 VUnOpMicrokernelTester()
7594 .batch_size(batch_size)
7595 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x48), VUnOpMicrokernelTester::OpType::ELU);
7596 }
7597 }
7598
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X48,batch_gt_48)7599 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X48, batch_gt_48) {
7600 TEST_REQUIRES_X86_AVX2;
7601 for (size_t batch_size = 49; batch_size < 96; batch_size++) {
7602 VUnOpMicrokernelTester()
7603 .batch_size(batch_size)
7604 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x48), VUnOpMicrokernelTester::OpType::ELU);
7605 }
7606 }
7607
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X48,inplace)7608 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X48, inplace) {
7609 TEST_REQUIRES_X86_AVX2;
7610 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
7611 VUnOpMicrokernelTester()
7612 .batch_size(batch_size)
7613 .inplace(true)
7614 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x48), VUnOpMicrokernelTester::OpType::ELU);
7615 }
7616 }
7617
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X48,prescale)7618 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X48, prescale) {
7619 TEST_REQUIRES_X86_AVX2;
7620 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
7621 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
7622 VUnOpMicrokernelTester()
7623 .batch_size(batch_size)
7624 .prescale(prescale)
7625 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x48), VUnOpMicrokernelTester::OpType::ELU);
7626 }
7627 }
7628 }
7629
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X48,alpha)7630 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X48, alpha) {
7631 TEST_REQUIRES_X86_AVX2;
7632 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
7633 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
7634 VUnOpMicrokernelTester()
7635 .batch_size(batch_size)
7636 .alpha(alpha)
7637 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x48), VUnOpMicrokernelTester::OpType::ELU);
7638 }
7639 }
7640 }
7641
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X48,beta)7642 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X48, beta) {
7643 TEST_REQUIRES_X86_AVX2;
7644 for (float beta : std::vector<float>({0.3f, 3.0f})) {
7645 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
7646 VUnOpMicrokernelTester()
7647 .batch_size(batch_size)
7648 .beta(beta)
7649 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x48), VUnOpMicrokernelTester::OpType::ELU);
7650 }
7651 }
7652 }
7653 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7654
7655
7656 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X56,batch_eq_56)7657 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X56, batch_eq_56) {
7658 TEST_REQUIRES_X86_AVX2;
7659 VUnOpMicrokernelTester()
7660 .batch_size(56)
7661 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x56), VUnOpMicrokernelTester::OpType::ELU);
7662 }
7663
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X56,batch_div_56)7664 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X56, batch_div_56) {
7665 TEST_REQUIRES_X86_AVX2;
7666 for (size_t batch_size = 112; batch_size < 560; batch_size += 56) {
7667 VUnOpMicrokernelTester()
7668 .batch_size(batch_size)
7669 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x56), VUnOpMicrokernelTester::OpType::ELU);
7670 }
7671 }
7672
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X56,batch_lt_56)7673 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X56, batch_lt_56) {
7674 TEST_REQUIRES_X86_AVX2;
7675 for (size_t batch_size = 1; batch_size < 56; batch_size++) {
7676 VUnOpMicrokernelTester()
7677 .batch_size(batch_size)
7678 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x56), VUnOpMicrokernelTester::OpType::ELU);
7679 }
7680 }
7681
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X56,batch_gt_56)7682 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X56, batch_gt_56) {
7683 TEST_REQUIRES_X86_AVX2;
7684 for (size_t batch_size = 57; batch_size < 112; batch_size++) {
7685 VUnOpMicrokernelTester()
7686 .batch_size(batch_size)
7687 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x56), VUnOpMicrokernelTester::OpType::ELU);
7688 }
7689 }
7690
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X56,inplace)7691 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X56, inplace) {
7692 TEST_REQUIRES_X86_AVX2;
7693 for (size_t batch_size = 1; batch_size <= 280; batch_size += 55) {
7694 VUnOpMicrokernelTester()
7695 .batch_size(batch_size)
7696 .inplace(true)
7697 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x56), VUnOpMicrokernelTester::OpType::ELU);
7698 }
7699 }
7700
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X56,prescale)7701 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X56, prescale) {
7702 TEST_REQUIRES_X86_AVX2;
7703 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
7704 for (size_t batch_size = 1; batch_size <= 280; batch_size += 55) {
7705 VUnOpMicrokernelTester()
7706 .batch_size(batch_size)
7707 .prescale(prescale)
7708 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x56), VUnOpMicrokernelTester::OpType::ELU);
7709 }
7710 }
7711 }
7712
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X56,alpha)7713 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X56, alpha) {
7714 TEST_REQUIRES_X86_AVX2;
7715 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
7716 for (size_t batch_size = 1; batch_size <= 280; batch_size += 55) {
7717 VUnOpMicrokernelTester()
7718 .batch_size(batch_size)
7719 .alpha(alpha)
7720 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x56), VUnOpMicrokernelTester::OpType::ELU);
7721 }
7722 }
7723 }
7724
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X56,beta)7725 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X56, beta) {
7726 TEST_REQUIRES_X86_AVX2;
7727 for (float beta : std::vector<float>({0.3f, 3.0f})) {
7728 for (size_t batch_size = 1; batch_size <= 280; batch_size += 55) {
7729 VUnOpMicrokernelTester()
7730 .batch_size(batch_size)
7731 .beta(beta)
7732 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x56), VUnOpMicrokernelTester::OpType::ELU);
7733 }
7734 }
7735 }
7736 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7737
7738
7739 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X64,batch_eq_64)7740 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X64, batch_eq_64) {
7741 TEST_REQUIRES_X86_AVX2;
7742 VUnOpMicrokernelTester()
7743 .batch_size(64)
7744 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x64), VUnOpMicrokernelTester::OpType::ELU);
7745 }
7746
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X64,batch_div_64)7747 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X64, batch_div_64) {
7748 TEST_REQUIRES_X86_AVX2;
7749 for (size_t batch_size = 128; batch_size < 640; batch_size += 64) {
7750 VUnOpMicrokernelTester()
7751 .batch_size(batch_size)
7752 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x64), VUnOpMicrokernelTester::OpType::ELU);
7753 }
7754 }
7755
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X64,batch_lt_64)7756 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X64, batch_lt_64) {
7757 TEST_REQUIRES_X86_AVX2;
7758 for (size_t batch_size = 1; batch_size < 64; batch_size++) {
7759 VUnOpMicrokernelTester()
7760 .batch_size(batch_size)
7761 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x64), VUnOpMicrokernelTester::OpType::ELU);
7762 }
7763 }
7764
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X64,batch_gt_64)7765 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X64, batch_gt_64) {
7766 TEST_REQUIRES_X86_AVX2;
7767 for (size_t batch_size = 65; batch_size < 128; batch_size++) {
7768 VUnOpMicrokernelTester()
7769 .batch_size(batch_size)
7770 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x64), VUnOpMicrokernelTester::OpType::ELU);
7771 }
7772 }
7773
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X64,inplace)7774 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X64, inplace) {
7775 TEST_REQUIRES_X86_AVX2;
7776 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
7777 VUnOpMicrokernelTester()
7778 .batch_size(batch_size)
7779 .inplace(true)
7780 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x64), VUnOpMicrokernelTester::OpType::ELU);
7781 }
7782 }
7783
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X64,prescale)7784 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X64, prescale) {
7785 TEST_REQUIRES_X86_AVX2;
7786 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
7787 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
7788 VUnOpMicrokernelTester()
7789 .batch_size(batch_size)
7790 .prescale(prescale)
7791 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x64), VUnOpMicrokernelTester::OpType::ELU);
7792 }
7793 }
7794 }
7795
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X64,alpha)7796 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X64, alpha) {
7797 TEST_REQUIRES_X86_AVX2;
7798 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
7799 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
7800 VUnOpMicrokernelTester()
7801 .batch_size(batch_size)
7802 .alpha(alpha)
7803 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x64), VUnOpMicrokernelTester::OpType::ELU);
7804 }
7805 }
7806 }
7807
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X64,beta)7808 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X64, beta) {
7809 TEST_REQUIRES_X86_AVX2;
7810 for (float beta : std::vector<float>({0.3f, 3.0f})) {
7811 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
7812 VUnOpMicrokernelTester()
7813 .batch_size(batch_size)
7814 .beta(beta)
7815 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x64), VUnOpMicrokernelTester::OpType::ELU);
7816 }
7817 }
7818 }
7819 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7820
7821
7822 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X72,batch_eq_72)7823 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X72, batch_eq_72) {
7824 TEST_REQUIRES_X86_AVX2;
7825 VUnOpMicrokernelTester()
7826 .batch_size(72)
7827 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72), VUnOpMicrokernelTester::OpType::ELU);
7828 }
7829
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X72,batch_div_72)7830 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X72, batch_div_72) {
7831 TEST_REQUIRES_X86_AVX2;
7832 for (size_t batch_size = 144; batch_size < 720; batch_size += 72) {
7833 VUnOpMicrokernelTester()
7834 .batch_size(batch_size)
7835 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72), VUnOpMicrokernelTester::OpType::ELU);
7836 }
7837 }
7838
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X72,batch_lt_72)7839 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X72, batch_lt_72) {
7840 TEST_REQUIRES_X86_AVX2;
7841 for (size_t batch_size = 1; batch_size < 72; batch_size++) {
7842 VUnOpMicrokernelTester()
7843 .batch_size(batch_size)
7844 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72), VUnOpMicrokernelTester::OpType::ELU);
7845 }
7846 }
7847
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X72,batch_gt_72)7848 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X72, batch_gt_72) {
7849 TEST_REQUIRES_X86_AVX2;
7850 for (size_t batch_size = 73; batch_size < 144; batch_size++) {
7851 VUnOpMicrokernelTester()
7852 .batch_size(batch_size)
7853 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72), VUnOpMicrokernelTester::OpType::ELU);
7854 }
7855 }
7856
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X72,inplace)7857 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X72, inplace) {
7858 TEST_REQUIRES_X86_AVX2;
7859 for (size_t batch_size = 1; batch_size <= 360; batch_size += 71) {
7860 VUnOpMicrokernelTester()
7861 .batch_size(batch_size)
7862 .inplace(true)
7863 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72), VUnOpMicrokernelTester::OpType::ELU);
7864 }
7865 }
7866
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X72,prescale)7867 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X72, prescale) {
7868 TEST_REQUIRES_X86_AVX2;
7869 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
7870 for (size_t batch_size = 1; batch_size <= 360; batch_size += 71) {
7871 VUnOpMicrokernelTester()
7872 .batch_size(batch_size)
7873 .prescale(prescale)
7874 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72), VUnOpMicrokernelTester::OpType::ELU);
7875 }
7876 }
7877 }
7878
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X72,alpha)7879 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X72, alpha) {
7880 TEST_REQUIRES_X86_AVX2;
7881 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
7882 for (size_t batch_size = 1; batch_size <= 360; batch_size += 71) {
7883 VUnOpMicrokernelTester()
7884 .batch_size(batch_size)
7885 .alpha(alpha)
7886 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72), VUnOpMicrokernelTester::OpType::ELU);
7887 }
7888 }
7889 }
7890
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X72,beta)7891 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X72, beta) {
7892 TEST_REQUIRES_X86_AVX2;
7893 for (float beta : std::vector<float>({0.3f, 3.0f})) {
7894 for (size_t batch_size = 1; batch_size <= 360; batch_size += 71) {
7895 VUnOpMicrokernelTester()
7896 .batch_size(batch_size)
7897 .beta(beta)
7898 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x72), VUnOpMicrokernelTester::OpType::ELU);
7899 }
7900 }
7901 }
7902 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7903
7904
7905 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X80,batch_eq_80)7906 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X80, batch_eq_80) {
7907 TEST_REQUIRES_X86_AVX2;
7908 VUnOpMicrokernelTester()
7909 .batch_size(80)
7910 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80), VUnOpMicrokernelTester::OpType::ELU);
7911 }
7912
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X80,batch_div_80)7913 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X80, batch_div_80) {
7914 TEST_REQUIRES_X86_AVX2;
7915 for (size_t batch_size = 160; batch_size < 800; batch_size += 80) {
7916 VUnOpMicrokernelTester()
7917 .batch_size(batch_size)
7918 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80), VUnOpMicrokernelTester::OpType::ELU);
7919 }
7920 }
7921
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X80,batch_lt_80)7922 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X80, batch_lt_80) {
7923 TEST_REQUIRES_X86_AVX2;
7924 for (size_t batch_size = 1; batch_size < 80; batch_size++) {
7925 VUnOpMicrokernelTester()
7926 .batch_size(batch_size)
7927 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80), VUnOpMicrokernelTester::OpType::ELU);
7928 }
7929 }
7930
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X80,batch_gt_80)7931 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X80, batch_gt_80) {
7932 TEST_REQUIRES_X86_AVX2;
7933 for (size_t batch_size = 81; batch_size < 160; batch_size++) {
7934 VUnOpMicrokernelTester()
7935 .batch_size(batch_size)
7936 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80), VUnOpMicrokernelTester::OpType::ELU);
7937 }
7938 }
7939
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X80,inplace)7940 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X80, inplace) {
7941 TEST_REQUIRES_X86_AVX2;
7942 for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
7943 VUnOpMicrokernelTester()
7944 .batch_size(batch_size)
7945 .inplace(true)
7946 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80), VUnOpMicrokernelTester::OpType::ELU);
7947 }
7948 }
7949
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X80,prescale)7950 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X80, prescale) {
7951 TEST_REQUIRES_X86_AVX2;
7952 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
7953 for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
7954 VUnOpMicrokernelTester()
7955 .batch_size(batch_size)
7956 .prescale(prescale)
7957 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80), VUnOpMicrokernelTester::OpType::ELU);
7958 }
7959 }
7960 }
7961
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X80,alpha)7962 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X80, alpha) {
7963 TEST_REQUIRES_X86_AVX2;
7964 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
7965 for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
7966 VUnOpMicrokernelTester()
7967 .batch_size(batch_size)
7968 .alpha(alpha)
7969 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80), VUnOpMicrokernelTester::OpType::ELU);
7970 }
7971 }
7972 }
7973
TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X80,beta)7974 TEST(F32_VELU__AVX2_RR1_LUT16_P3_GATHER_X80, beta) {
7975 TEST_REQUIRES_X86_AVX2;
7976 for (float beta : std::vector<float>({0.3f, 3.0f})) {
7977 for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
7978 VUnOpMicrokernelTester()
7979 .batch_size(batch_size)
7980 .beta(beta)
7981 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x80), VUnOpMicrokernelTester::OpType::ELU);
7982 }
7983 }
7984 }
7985 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
7986
7987
7988 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_P6_X8,batch_eq_8)7989 TEST(F32_VELU__AVX2_RR1_P6_X8, batch_eq_8) {
7990 TEST_REQUIRES_X86_AVX2;
7991 VUnOpMicrokernelTester()
7992 .batch_size(8)
7993 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x8), VUnOpMicrokernelTester::OpType::ELU);
7994 }
7995
TEST(F32_VELU__AVX2_RR1_P6_X8,batch_div_8)7996 TEST(F32_VELU__AVX2_RR1_P6_X8, batch_div_8) {
7997 TEST_REQUIRES_X86_AVX2;
7998 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
7999 VUnOpMicrokernelTester()
8000 .batch_size(batch_size)
8001 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x8), VUnOpMicrokernelTester::OpType::ELU);
8002 }
8003 }
8004
TEST(F32_VELU__AVX2_RR1_P6_X8,batch_lt_8)8005 TEST(F32_VELU__AVX2_RR1_P6_X8, batch_lt_8) {
8006 TEST_REQUIRES_X86_AVX2;
8007 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
8008 VUnOpMicrokernelTester()
8009 .batch_size(batch_size)
8010 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x8), VUnOpMicrokernelTester::OpType::ELU);
8011 }
8012 }
8013
TEST(F32_VELU__AVX2_RR1_P6_X8,batch_gt_8)8014 TEST(F32_VELU__AVX2_RR1_P6_X8, batch_gt_8) {
8015 TEST_REQUIRES_X86_AVX2;
8016 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
8017 VUnOpMicrokernelTester()
8018 .batch_size(batch_size)
8019 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x8), VUnOpMicrokernelTester::OpType::ELU);
8020 }
8021 }
8022
TEST(F32_VELU__AVX2_RR1_P6_X8,inplace)8023 TEST(F32_VELU__AVX2_RR1_P6_X8, inplace) {
8024 TEST_REQUIRES_X86_AVX2;
8025 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
8026 VUnOpMicrokernelTester()
8027 .batch_size(batch_size)
8028 .inplace(true)
8029 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x8), VUnOpMicrokernelTester::OpType::ELU);
8030 }
8031 }
8032
TEST(F32_VELU__AVX2_RR1_P6_X8,prescale)8033 TEST(F32_VELU__AVX2_RR1_P6_X8, prescale) {
8034 TEST_REQUIRES_X86_AVX2;
8035 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
8036 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
8037 VUnOpMicrokernelTester()
8038 .batch_size(batch_size)
8039 .prescale(prescale)
8040 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x8), VUnOpMicrokernelTester::OpType::ELU);
8041 }
8042 }
8043 }
8044
TEST(F32_VELU__AVX2_RR1_P6_X8,alpha)8045 TEST(F32_VELU__AVX2_RR1_P6_X8, alpha) {
8046 TEST_REQUIRES_X86_AVX2;
8047 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
8048 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
8049 VUnOpMicrokernelTester()
8050 .batch_size(batch_size)
8051 .alpha(alpha)
8052 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x8), VUnOpMicrokernelTester::OpType::ELU);
8053 }
8054 }
8055 }
8056
TEST(F32_VELU__AVX2_RR1_P6_X8,beta)8057 TEST(F32_VELU__AVX2_RR1_P6_X8, beta) {
8058 TEST_REQUIRES_X86_AVX2;
8059 for (float beta : std::vector<float>({0.3f, 3.0f})) {
8060 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
8061 VUnOpMicrokernelTester()
8062 .batch_size(batch_size)
8063 .beta(beta)
8064 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x8), VUnOpMicrokernelTester::OpType::ELU);
8065 }
8066 }
8067 }
8068 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8069
8070
8071 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_P6_X16,batch_eq_16)8072 TEST(F32_VELU__AVX2_RR1_P6_X16, batch_eq_16) {
8073 TEST_REQUIRES_X86_AVX2;
8074 VUnOpMicrokernelTester()
8075 .batch_size(16)
8076 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
8077 }
8078
TEST(F32_VELU__AVX2_RR1_P6_X16,batch_div_16)8079 TEST(F32_VELU__AVX2_RR1_P6_X16, batch_div_16) {
8080 TEST_REQUIRES_X86_AVX2;
8081 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
8082 VUnOpMicrokernelTester()
8083 .batch_size(batch_size)
8084 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
8085 }
8086 }
8087
TEST(F32_VELU__AVX2_RR1_P6_X16,batch_lt_16)8088 TEST(F32_VELU__AVX2_RR1_P6_X16, batch_lt_16) {
8089 TEST_REQUIRES_X86_AVX2;
8090 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
8091 VUnOpMicrokernelTester()
8092 .batch_size(batch_size)
8093 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
8094 }
8095 }
8096
TEST(F32_VELU__AVX2_RR1_P6_X16,batch_gt_16)8097 TEST(F32_VELU__AVX2_RR1_P6_X16, batch_gt_16) {
8098 TEST_REQUIRES_X86_AVX2;
8099 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
8100 VUnOpMicrokernelTester()
8101 .batch_size(batch_size)
8102 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
8103 }
8104 }
8105
TEST(F32_VELU__AVX2_RR1_P6_X16,inplace)8106 TEST(F32_VELU__AVX2_RR1_P6_X16, inplace) {
8107 TEST_REQUIRES_X86_AVX2;
8108 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
8109 VUnOpMicrokernelTester()
8110 .batch_size(batch_size)
8111 .inplace(true)
8112 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
8113 }
8114 }
8115
TEST(F32_VELU__AVX2_RR1_P6_X16,prescale)8116 TEST(F32_VELU__AVX2_RR1_P6_X16, prescale) {
8117 TEST_REQUIRES_X86_AVX2;
8118 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
8119 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
8120 VUnOpMicrokernelTester()
8121 .batch_size(batch_size)
8122 .prescale(prescale)
8123 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
8124 }
8125 }
8126 }
8127
TEST(F32_VELU__AVX2_RR1_P6_X16,alpha)8128 TEST(F32_VELU__AVX2_RR1_P6_X16, alpha) {
8129 TEST_REQUIRES_X86_AVX2;
8130 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
8131 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
8132 VUnOpMicrokernelTester()
8133 .batch_size(batch_size)
8134 .alpha(alpha)
8135 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
8136 }
8137 }
8138 }
8139
TEST(F32_VELU__AVX2_RR1_P6_X16,beta)8140 TEST(F32_VELU__AVX2_RR1_P6_X16, beta) {
8141 TEST_REQUIRES_X86_AVX2;
8142 for (float beta : std::vector<float>({0.3f, 3.0f})) {
8143 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
8144 VUnOpMicrokernelTester()
8145 .batch_size(batch_size)
8146 .beta(beta)
8147 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
8148 }
8149 }
8150 }
8151 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8152
8153
8154 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_P6_X24,batch_eq_24)8155 TEST(F32_VELU__AVX2_RR1_P6_X24, batch_eq_24) {
8156 TEST_REQUIRES_X86_AVX2;
8157 VUnOpMicrokernelTester()
8158 .batch_size(24)
8159 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x24), VUnOpMicrokernelTester::OpType::ELU);
8160 }
8161
TEST(F32_VELU__AVX2_RR1_P6_X24,batch_div_24)8162 TEST(F32_VELU__AVX2_RR1_P6_X24, batch_div_24) {
8163 TEST_REQUIRES_X86_AVX2;
8164 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
8165 VUnOpMicrokernelTester()
8166 .batch_size(batch_size)
8167 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x24), VUnOpMicrokernelTester::OpType::ELU);
8168 }
8169 }
8170
TEST(F32_VELU__AVX2_RR1_P6_X24,batch_lt_24)8171 TEST(F32_VELU__AVX2_RR1_P6_X24, batch_lt_24) {
8172 TEST_REQUIRES_X86_AVX2;
8173 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
8174 VUnOpMicrokernelTester()
8175 .batch_size(batch_size)
8176 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x24), VUnOpMicrokernelTester::OpType::ELU);
8177 }
8178 }
8179
TEST(F32_VELU__AVX2_RR1_P6_X24,batch_gt_24)8180 TEST(F32_VELU__AVX2_RR1_P6_X24, batch_gt_24) {
8181 TEST_REQUIRES_X86_AVX2;
8182 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
8183 VUnOpMicrokernelTester()
8184 .batch_size(batch_size)
8185 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x24), VUnOpMicrokernelTester::OpType::ELU);
8186 }
8187 }
8188
TEST(F32_VELU__AVX2_RR1_P6_X24,inplace)8189 TEST(F32_VELU__AVX2_RR1_P6_X24, inplace) {
8190 TEST_REQUIRES_X86_AVX2;
8191 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
8192 VUnOpMicrokernelTester()
8193 .batch_size(batch_size)
8194 .inplace(true)
8195 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x24), VUnOpMicrokernelTester::OpType::ELU);
8196 }
8197 }
8198
TEST(F32_VELU__AVX2_RR1_P6_X24,prescale)8199 TEST(F32_VELU__AVX2_RR1_P6_X24, prescale) {
8200 TEST_REQUIRES_X86_AVX2;
8201 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
8202 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
8203 VUnOpMicrokernelTester()
8204 .batch_size(batch_size)
8205 .prescale(prescale)
8206 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x24), VUnOpMicrokernelTester::OpType::ELU);
8207 }
8208 }
8209 }
8210
TEST(F32_VELU__AVX2_RR1_P6_X24,alpha)8211 TEST(F32_VELU__AVX2_RR1_P6_X24, alpha) {
8212 TEST_REQUIRES_X86_AVX2;
8213 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
8214 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
8215 VUnOpMicrokernelTester()
8216 .batch_size(batch_size)
8217 .alpha(alpha)
8218 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x24), VUnOpMicrokernelTester::OpType::ELU);
8219 }
8220 }
8221 }
8222
TEST(F32_VELU__AVX2_RR1_P6_X24,beta)8223 TEST(F32_VELU__AVX2_RR1_P6_X24, beta) {
8224 TEST_REQUIRES_X86_AVX2;
8225 for (float beta : std::vector<float>({0.3f, 3.0f})) {
8226 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
8227 VUnOpMicrokernelTester()
8228 .batch_size(batch_size)
8229 .beta(beta)
8230 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x24), VUnOpMicrokernelTester::OpType::ELU);
8231 }
8232 }
8233 }
8234 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8235
8236
8237 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_P6_X32,batch_eq_32)8238 TEST(F32_VELU__AVX2_RR1_P6_X32, batch_eq_32) {
8239 TEST_REQUIRES_X86_AVX2;
8240 VUnOpMicrokernelTester()
8241 .batch_size(32)
8242 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x32), VUnOpMicrokernelTester::OpType::ELU);
8243 }
8244
TEST(F32_VELU__AVX2_RR1_P6_X32,batch_div_32)8245 TEST(F32_VELU__AVX2_RR1_P6_X32, batch_div_32) {
8246 TEST_REQUIRES_X86_AVX2;
8247 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
8248 VUnOpMicrokernelTester()
8249 .batch_size(batch_size)
8250 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x32), VUnOpMicrokernelTester::OpType::ELU);
8251 }
8252 }
8253
TEST(F32_VELU__AVX2_RR1_P6_X32,batch_lt_32)8254 TEST(F32_VELU__AVX2_RR1_P6_X32, batch_lt_32) {
8255 TEST_REQUIRES_X86_AVX2;
8256 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
8257 VUnOpMicrokernelTester()
8258 .batch_size(batch_size)
8259 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x32), VUnOpMicrokernelTester::OpType::ELU);
8260 }
8261 }
8262
TEST(F32_VELU__AVX2_RR1_P6_X32,batch_gt_32)8263 TEST(F32_VELU__AVX2_RR1_P6_X32, batch_gt_32) {
8264 TEST_REQUIRES_X86_AVX2;
8265 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
8266 VUnOpMicrokernelTester()
8267 .batch_size(batch_size)
8268 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x32), VUnOpMicrokernelTester::OpType::ELU);
8269 }
8270 }
8271
TEST(F32_VELU__AVX2_RR1_P6_X32,inplace)8272 TEST(F32_VELU__AVX2_RR1_P6_X32, inplace) {
8273 TEST_REQUIRES_X86_AVX2;
8274 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
8275 VUnOpMicrokernelTester()
8276 .batch_size(batch_size)
8277 .inplace(true)
8278 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x32), VUnOpMicrokernelTester::OpType::ELU);
8279 }
8280 }
8281
TEST(F32_VELU__AVX2_RR1_P6_X32,prescale)8282 TEST(F32_VELU__AVX2_RR1_P6_X32, prescale) {
8283 TEST_REQUIRES_X86_AVX2;
8284 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
8285 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
8286 VUnOpMicrokernelTester()
8287 .batch_size(batch_size)
8288 .prescale(prescale)
8289 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x32), VUnOpMicrokernelTester::OpType::ELU);
8290 }
8291 }
8292 }
8293
TEST(F32_VELU__AVX2_RR1_P6_X32,alpha)8294 TEST(F32_VELU__AVX2_RR1_P6_X32, alpha) {
8295 TEST_REQUIRES_X86_AVX2;
8296 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
8297 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
8298 VUnOpMicrokernelTester()
8299 .batch_size(batch_size)
8300 .alpha(alpha)
8301 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x32), VUnOpMicrokernelTester::OpType::ELU);
8302 }
8303 }
8304 }
8305
TEST(F32_VELU__AVX2_RR1_P6_X32,beta)8306 TEST(F32_VELU__AVX2_RR1_P6_X32, beta) {
8307 TEST_REQUIRES_X86_AVX2;
8308 for (float beta : std::vector<float>({0.3f, 3.0f})) {
8309 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
8310 VUnOpMicrokernelTester()
8311 .batch_size(batch_size)
8312 .beta(beta)
8313 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x32), VUnOpMicrokernelTester::OpType::ELU);
8314 }
8315 }
8316 }
8317 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8318
8319
8320 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_P6_X40,batch_eq_40)8321 TEST(F32_VELU__AVX2_RR1_P6_X40, batch_eq_40) {
8322 TEST_REQUIRES_X86_AVX2;
8323 VUnOpMicrokernelTester()
8324 .batch_size(40)
8325 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x40), VUnOpMicrokernelTester::OpType::ELU);
8326 }
8327
TEST(F32_VELU__AVX2_RR1_P6_X40,batch_div_40)8328 TEST(F32_VELU__AVX2_RR1_P6_X40, batch_div_40) {
8329 TEST_REQUIRES_X86_AVX2;
8330 for (size_t batch_size = 80; batch_size < 400; batch_size += 40) {
8331 VUnOpMicrokernelTester()
8332 .batch_size(batch_size)
8333 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x40), VUnOpMicrokernelTester::OpType::ELU);
8334 }
8335 }
8336
TEST(F32_VELU__AVX2_RR1_P6_X40,batch_lt_40)8337 TEST(F32_VELU__AVX2_RR1_P6_X40, batch_lt_40) {
8338 TEST_REQUIRES_X86_AVX2;
8339 for (size_t batch_size = 1; batch_size < 40; batch_size++) {
8340 VUnOpMicrokernelTester()
8341 .batch_size(batch_size)
8342 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x40), VUnOpMicrokernelTester::OpType::ELU);
8343 }
8344 }
8345
TEST(F32_VELU__AVX2_RR1_P6_X40,batch_gt_40)8346 TEST(F32_VELU__AVX2_RR1_P6_X40, batch_gt_40) {
8347 TEST_REQUIRES_X86_AVX2;
8348 for (size_t batch_size = 41; batch_size < 80; batch_size++) {
8349 VUnOpMicrokernelTester()
8350 .batch_size(batch_size)
8351 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x40), VUnOpMicrokernelTester::OpType::ELU);
8352 }
8353 }
8354
TEST(F32_VELU__AVX2_RR1_P6_X40,inplace)8355 TEST(F32_VELU__AVX2_RR1_P6_X40, inplace) {
8356 TEST_REQUIRES_X86_AVX2;
8357 for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
8358 VUnOpMicrokernelTester()
8359 .batch_size(batch_size)
8360 .inplace(true)
8361 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x40), VUnOpMicrokernelTester::OpType::ELU);
8362 }
8363 }
8364
TEST(F32_VELU__AVX2_RR1_P6_X40,prescale)8365 TEST(F32_VELU__AVX2_RR1_P6_X40, prescale) {
8366 TEST_REQUIRES_X86_AVX2;
8367 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
8368 for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
8369 VUnOpMicrokernelTester()
8370 .batch_size(batch_size)
8371 .prescale(prescale)
8372 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x40), VUnOpMicrokernelTester::OpType::ELU);
8373 }
8374 }
8375 }
8376
TEST(F32_VELU__AVX2_RR1_P6_X40,alpha)8377 TEST(F32_VELU__AVX2_RR1_P6_X40, alpha) {
8378 TEST_REQUIRES_X86_AVX2;
8379 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
8380 for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
8381 VUnOpMicrokernelTester()
8382 .batch_size(batch_size)
8383 .alpha(alpha)
8384 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x40), VUnOpMicrokernelTester::OpType::ELU);
8385 }
8386 }
8387 }
8388
TEST(F32_VELU__AVX2_RR1_P6_X40,beta)8389 TEST(F32_VELU__AVX2_RR1_P6_X40, beta) {
8390 TEST_REQUIRES_X86_AVX2;
8391 for (float beta : std::vector<float>({0.3f, 3.0f})) {
8392 for (size_t batch_size = 1; batch_size <= 200; batch_size += 39) {
8393 VUnOpMicrokernelTester()
8394 .batch_size(batch_size)
8395 .beta(beta)
8396 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x40), VUnOpMicrokernelTester::OpType::ELU);
8397 }
8398 }
8399 }
8400 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8401
8402
8403 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_P6_X48,batch_eq_48)8404 TEST(F32_VELU__AVX2_RR1_P6_X48, batch_eq_48) {
8405 TEST_REQUIRES_X86_AVX2;
8406 VUnOpMicrokernelTester()
8407 .batch_size(48)
8408 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x48), VUnOpMicrokernelTester::OpType::ELU);
8409 }
8410
TEST(F32_VELU__AVX2_RR1_P6_X48,batch_div_48)8411 TEST(F32_VELU__AVX2_RR1_P6_X48, batch_div_48) {
8412 TEST_REQUIRES_X86_AVX2;
8413 for (size_t batch_size = 96; batch_size < 480; batch_size += 48) {
8414 VUnOpMicrokernelTester()
8415 .batch_size(batch_size)
8416 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x48), VUnOpMicrokernelTester::OpType::ELU);
8417 }
8418 }
8419
TEST(F32_VELU__AVX2_RR1_P6_X48,batch_lt_48)8420 TEST(F32_VELU__AVX2_RR1_P6_X48, batch_lt_48) {
8421 TEST_REQUIRES_X86_AVX2;
8422 for (size_t batch_size = 1; batch_size < 48; batch_size++) {
8423 VUnOpMicrokernelTester()
8424 .batch_size(batch_size)
8425 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x48), VUnOpMicrokernelTester::OpType::ELU);
8426 }
8427 }
8428
TEST(F32_VELU__AVX2_RR1_P6_X48,batch_gt_48)8429 TEST(F32_VELU__AVX2_RR1_P6_X48, batch_gt_48) {
8430 TEST_REQUIRES_X86_AVX2;
8431 for (size_t batch_size = 49; batch_size < 96; batch_size++) {
8432 VUnOpMicrokernelTester()
8433 .batch_size(batch_size)
8434 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x48), VUnOpMicrokernelTester::OpType::ELU);
8435 }
8436 }
8437
TEST(F32_VELU__AVX2_RR1_P6_X48,inplace)8438 TEST(F32_VELU__AVX2_RR1_P6_X48, inplace) {
8439 TEST_REQUIRES_X86_AVX2;
8440 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
8441 VUnOpMicrokernelTester()
8442 .batch_size(batch_size)
8443 .inplace(true)
8444 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x48), VUnOpMicrokernelTester::OpType::ELU);
8445 }
8446 }
8447
TEST(F32_VELU__AVX2_RR1_P6_X48,prescale)8448 TEST(F32_VELU__AVX2_RR1_P6_X48, prescale) {
8449 TEST_REQUIRES_X86_AVX2;
8450 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
8451 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
8452 VUnOpMicrokernelTester()
8453 .batch_size(batch_size)
8454 .prescale(prescale)
8455 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x48), VUnOpMicrokernelTester::OpType::ELU);
8456 }
8457 }
8458 }
8459
TEST(F32_VELU__AVX2_RR1_P6_X48,alpha)8460 TEST(F32_VELU__AVX2_RR1_P6_X48, alpha) {
8461 TEST_REQUIRES_X86_AVX2;
8462 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
8463 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
8464 VUnOpMicrokernelTester()
8465 .batch_size(batch_size)
8466 .alpha(alpha)
8467 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x48), VUnOpMicrokernelTester::OpType::ELU);
8468 }
8469 }
8470 }
8471
TEST(F32_VELU__AVX2_RR1_P6_X48,beta)8472 TEST(F32_VELU__AVX2_RR1_P6_X48, beta) {
8473 TEST_REQUIRES_X86_AVX2;
8474 for (float beta : std::vector<float>({0.3f, 3.0f})) {
8475 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
8476 VUnOpMicrokernelTester()
8477 .batch_size(batch_size)
8478 .beta(beta)
8479 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x48), VUnOpMicrokernelTester::OpType::ELU);
8480 }
8481 }
8482 }
8483 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8484
8485
8486 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_P6_X56,batch_eq_56)8487 TEST(F32_VELU__AVX2_RR1_P6_X56, batch_eq_56) {
8488 TEST_REQUIRES_X86_AVX2;
8489 VUnOpMicrokernelTester()
8490 .batch_size(56)
8491 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x56), VUnOpMicrokernelTester::OpType::ELU);
8492 }
8493
TEST(F32_VELU__AVX2_RR1_P6_X56,batch_div_56)8494 TEST(F32_VELU__AVX2_RR1_P6_X56, batch_div_56) {
8495 TEST_REQUIRES_X86_AVX2;
8496 for (size_t batch_size = 112; batch_size < 560; batch_size += 56) {
8497 VUnOpMicrokernelTester()
8498 .batch_size(batch_size)
8499 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x56), VUnOpMicrokernelTester::OpType::ELU);
8500 }
8501 }
8502
TEST(F32_VELU__AVX2_RR1_P6_X56,batch_lt_56)8503 TEST(F32_VELU__AVX2_RR1_P6_X56, batch_lt_56) {
8504 TEST_REQUIRES_X86_AVX2;
8505 for (size_t batch_size = 1; batch_size < 56; batch_size++) {
8506 VUnOpMicrokernelTester()
8507 .batch_size(batch_size)
8508 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x56), VUnOpMicrokernelTester::OpType::ELU);
8509 }
8510 }
8511
TEST(F32_VELU__AVX2_RR1_P6_X56,batch_gt_56)8512 TEST(F32_VELU__AVX2_RR1_P6_X56, batch_gt_56) {
8513 TEST_REQUIRES_X86_AVX2;
8514 for (size_t batch_size = 57; batch_size < 112; batch_size++) {
8515 VUnOpMicrokernelTester()
8516 .batch_size(batch_size)
8517 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x56), VUnOpMicrokernelTester::OpType::ELU);
8518 }
8519 }
8520
TEST(F32_VELU__AVX2_RR1_P6_X56,inplace)8521 TEST(F32_VELU__AVX2_RR1_P6_X56, inplace) {
8522 TEST_REQUIRES_X86_AVX2;
8523 for (size_t batch_size = 1; batch_size <= 280; batch_size += 55) {
8524 VUnOpMicrokernelTester()
8525 .batch_size(batch_size)
8526 .inplace(true)
8527 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x56), VUnOpMicrokernelTester::OpType::ELU);
8528 }
8529 }
8530
TEST(F32_VELU__AVX2_RR1_P6_X56,prescale)8531 TEST(F32_VELU__AVX2_RR1_P6_X56, prescale) {
8532 TEST_REQUIRES_X86_AVX2;
8533 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
8534 for (size_t batch_size = 1; batch_size <= 280; batch_size += 55) {
8535 VUnOpMicrokernelTester()
8536 .batch_size(batch_size)
8537 .prescale(prescale)
8538 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x56), VUnOpMicrokernelTester::OpType::ELU);
8539 }
8540 }
8541 }
8542
TEST(F32_VELU__AVX2_RR1_P6_X56,alpha)8543 TEST(F32_VELU__AVX2_RR1_P6_X56, alpha) {
8544 TEST_REQUIRES_X86_AVX2;
8545 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
8546 for (size_t batch_size = 1; batch_size <= 280; batch_size += 55) {
8547 VUnOpMicrokernelTester()
8548 .batch_size(batch_size)
8549 .alpha(alpha)
8550 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x56), VUnOpMicrokernelTester::OpType::ELU);
8551 }
8552 }
8553 }
8554
TEST(F32_VELU__AVX2_RR1_P6_X56,beta)8555 TEST(F32_VELU__AVX2_RR1_P6_X56, beta) {
8556 TEST_REQUIRES_X86_AVX2;
8557 for (float beta : std::vector<float>({0.3f, 3.0f})) {
8558 for (size_t batch_size = 1; batch_size <= 280; batch_size += 55) {
8559 VUnOpMicrokernelTester()
8560 .batch_size(batch_size)
8561 .beta(beta)
8562 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x56), VUnOpMicrokernelTester::OpType::ELU);
8563 }
8564 }
8565 }
8566 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8567
8568
8569 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_P6_X64,batch_eq_64)8570 TEST(F32_VELU__AVX2_RR1_P6_X64, batch_eq_64) {
8571 TEST_REQUIRES_X86_AVX2;
8572 VUnOpMicrokernelTester()
8573 .batch_size(64)
8574 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x64), VUnOpMicrokernelTester::OpType::ELU);
8575 }
8576
TEST(F32_VELU__AVX2_RR1_P6_X64,batch_div_64)8577 TEST(F32_VELU__AVX2_RR1_P6_X64, batch_div_64) {
8578 TEST_REQUIRES_X86_AVX2;
8579 for (size_t batch_size = 128; batch_size < 640; batch_size += 64) {
8580 VUnOpMicrokernelTester()
8581 .batch_size(batch_size)
8582 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x64), VUnOpMicrokernelTester::OpType::ELU);
8583 }
8584 }
8585
TEST(F32_VELU__AVX2_RR1_P6_X64,batch_lt_64)8586 TEST(F32_VELU__AVX2_RR1_P6_X64, batch_lt_64) {
8587 TEST_REQUIRES_X86_AVX2;
8588 for (size_t batch_size = 1; batch_size < 64; batch_size++) {
8589 VUnOpMicrokernelTester()
8590 .batch_size(batch_size)
8591 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x64), VUnOpMicrokernelTester::OpType::ELU);
8592 }
8593 }
8594
TEST(F32_VELU__AVX2_RR1_P6_X64,batch_gt_64)8595 TEST(F32_VELU__AVX2_RR1_P6_X64, batch_gt_64) {
8596 TEST_REQUIRES_X86_AVX2;
8597 for (size_t batch_size = 65; batch_size < 128; batch_size++) {
8598 VUnOpMicrokernelTester()
8599 .batch_size(batch_size)
8600 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x64), VUnOpMicrokernelTester::OpType::ELU);
8601 }
8602 }
8603
TEST(F32_VELU__AVX2_RR1_P6_X64,inplace)8604 TEST(F32_VELU__AVX2_RR1_P6_X64, inplace) {
8605 TEST_REQUIRES_X86_AVX2;
8606 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
8607 VUnOpMicrokernelTester()
8608 .batch_size(batch_size)
8609 .inplace(true)
8610 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x64), VUnOpMicrokernelTester::OpType::ELU);
8611 }
8612 }
8613
TEST(F32_VELU__AVX2_RR1_P6_X64,prescale)8614 TEST(F32_VELU__AVX2_RR1_P6_X64, prescale) {
8615 TEST_REQUIRES_X86_AVX2;
8616 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
8617 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
8618 VUnOpMicrokernelTester()
8619 .batch_size(batch_size)
8620 .prescale(prescale)
8621 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x64), VUnOpMicrokernelTester::OpType::ELU);
8622 }
8623 }
8624 }
8625
TEST(F32_VELU__AVX2_RR1_P6_X64,alpha)8626 TEST(F32_VELU__AVX2_RR1_P6_X64, alpha) {
8627 TEST_REQUIRES_X86_AVX2;
8628 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
8629 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
8630 VUnOpMicrokernelTester()
8631 .batch_size(batch_size)
8632 .alpha(alpha)
8633 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x64), VUnOpMicrokernelTester::OpType::ELU);
8634 }
8635 }
8636 }
8637
TEST(F32_VELU__AVX2_RR1_P6_X64,beta)8638 TEST(F32_VELU__AVX2_RR1_P6_X64, beta) {
8639 TEST_REQUIRES_X86_AVX2;
8640 for (float beta : std::vector<float>({0.3f, 3.0f})) {
8641 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
8642 VUnOpMicrokernelTester()
8643 .batch_size(batch_size)
8644 .beta(beta)
8645 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x64), VUnOpMicrokernelTester::OpType::ELU);
8646 }
8647 }
8648 }
8649 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8650
8651
8652 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_P6_X72,batch_eq_72)8653 TEST(F32_VELU__AVX2_RR1_P6_X72, batch_eq_72) {
8654 TEST_REQUIRES_X86_AVX2;
8655 VUnOpMicrokernelTester()
8656 .batch_size(72)
8657 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x72), VUnOpMicrokernelTester::OpType::ELU);
8658 }
8659
TEST(F32_VELU__AVX2_RR1_P6_X72,batch_div_72)8660 TEST(F32_VELU__AVX2_RR1_P6_X72, batch_div_72) {
8661 TEST_REQUIRES_X86_AVX2;
8662 for (size_t batch_size = 144; batch_size < 720; batch_size += 72) {
8663 VUnOpMicrokernelTester()
8664 .batch_size(batch_size)
8665 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x72), VUnOpMicrokernelTester::OpType::ELU);
8666 }
8667 }
8668
TEST(F32_VELU__AVX2_RR1_P6_X72,batch_lt_72)8669 TEST(F32_VELU__AVX2_RR1_P6_X72, batch_lt_72) {
8670 TEST_REQUIRES_X86_AVX2;
8671 for (size_t batch_size = 1; batch_size < 72; batch_size++) {
8672 VUnOpMicrokernelTester()
8673 .batch_size(batch_size)
8674 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x72), VUnOpMicrokernelTester::OpType::ELU);
8675 }
8676 }
8677
TEST(F32_VELU__AVX2_RR1_P6_X72,batch_gt_72)8678 TEST(F32_VELU__AVX2_RR1_P6_X72, batch_gt_72) {
8679 TEST_REQUIRES_X86_AVX2;
8680 for (size_t batch_size = 73; batch_size < 144; batch_size++) {
8681 VUnOpMicrokernelTester()
8682 .batch_size(batch_size)
8683 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x72), VUnOpMicrokernelTester::OpType::ELU);
8684 }
8685 }
8686
TEST(F32_VELU__AVX2_RR1_P6_X72,inplace)8687 TEST(F32_VELU__AVX2_RR1_P6_X72, inplace) {
8688 TEST_REQUIRES_X86_AVX2;
8689 for (size_t batch_size = 1; batch_size <= 360; batch_size += 71) {
8690 VUnOpMicrokernelTester()
8691 .batch_size(batch_size)
8692 .inplace(true)
8693 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x72), VUnOpMicrokernelTester::OpType::ELU);
8694 }
8695 }
8696
TEST(F32_VELU__AVX2_RR1_P6_X72,prescale)8697 TEST(F32_VELU__AVX2_RR1_P6_X72, prescale) {
8698 TEST_REQUIRES_X86_AVX2;
8699 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
8700 for (size_t batch_size = 1; batch_size <= 360; batch_size += 71) {
8701 VUnOpMicrokernelTester()
8702 .batch_size(batch_size)
8703 .prescale(prescale)
8704 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x72), VUnOpMicrokernelTester::OpType::ELU);
8705 }
8706 }
8707 }
8708
TEST(F32_VELU__AVX2_RR1_P6_X72,alpha)8709 TEST(F32_VELU__AVX2_RR1_P6_X72, alpha) {
8710 TEST_REQUIRES_X86_AVX2;
8711 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
8712 for (size_t batch_size = 1; batch_size <= 360; batch_size += 71) {
8713 VUnOpMicrokernelTester()
8714 .batch_size(batch_size)
8715 .alpha(alpha)
8716 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x72), VUnOpMicrokernelTester::OpType::ELU);
8717 }
8718 }
8719 }
8720
TEST(F32_VELU__AVX2_RR1_P6_X72,beta)8721 TEST(F32_VELU__AVX2_RR1_P6_X72, beta) {
8722 TEST_REQUIRES_X86_AVX2;
8723 for (float beta : std::vector<float>({0.3f, 3.0f})) {
8724 for (size_t batch_size = 1; batch_size <= 360; batch_size += 71) {
8725 VUnOpMicrokernelTester()
8726 .batch_size(batch_size)
8727 .beta(beta)
8728 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x72), VUnOpMicrokernelTester::OpType::ELU);
8729 }
8730 }
8731 }
8732 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8733
8734
8735 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX2_RR1_P6_X80,batch_eq_80)8736 TEST(F32_VELU__AVX2_RR1_P6_X80, batch_eq_80) {
8737 TEST_REQUIRES_X86_AVX2;
8738 VUnOpMicrokernelTester()
8739 .batch_size(80)
8740 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x80), VUnOpMicrokernelTester::OpType::ELU);
8741 }
8742
TEST(F32_VELU__AVX2_RR1_P6_X80,batch_div_80)8743 TEST(F32_VELU__AVX2_RR1_P6_X80, batch_div_80) {
8744 TEST_REQUIRES_X86_AVX2;
8745 for (size_t batch_size = 160; batch_size < 800; batch_size += 80) {
8746 VUnOpMicrokernelTester()
8747 .batch_size(batch_size)
8748 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x80), VUnOpMicrokernelTester::OpType::ELU);
8749 }
8750 }
8751
TEST(F32_VELU__AVX2_RR1_P6_X80,batch_lt_80)8752 TEST(F32_VELU__AVX2_RR1_P6_X80, batch_lt_80) {
8753 TEST_REQUIRES_X86_AVX2;
8754 for (size_t batch_size = 1; batch_size < 80; batch_size++) {
8755 VUnOpMicrokernelTester()
8756 .batch_size(batch_size)
8757 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x80), VUnOpMicrokernelTester::OpType::ELU);
8758 }
8759 }
8760
TEST(F32_VELU__AVX2_RR1_P6_X80,batch_gt_80)8761 TEST(F32_VELU__AVX2_RR1_P6_X80, batch_gt_80) {
8762 TEST_REQUIRES_X86_AVX2;
8763 for (size_t batch_size = 81; batch_size < 160; batch_size++) {
8764 VUnOpMicrokernelTester()
8765 .batch_size(batch_size)
8766 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x80), VUnOpMicrokernelTester::OpType::ELU);
8767 }
8768 }
8769
TEST(F32_VELU__AVX2_RR1_P6_X80,inplace)8770 TEST(F32_VELU__AVX2_RR1_P6_X80, inplace) {
8771 TEST_REQUIRES_X86_AVX2;
8772 for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
8773 VUnOpMicrokernelTester()
8774 .batch_size(batch_size)
8775 .inplace(true)
8776 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x80), VUnOpMicrokernelTester::OpType::ELU);
8777 }
8778 }
8779
TEST(F32_VELU__AVX2_RR1_P6_X80,prescale)8780 TEST(F32_VELU__AVX2_RR1_P6_X80, prescale) {
8781 TEST_REQUIRES_X86_AVX2;
8782 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
8783 for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
8784 VUnOpMicrokernelTester()
8785 .batch_size(batch_size)
8786 .prescale(prescale)
8787 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x80), VUnOpMicrokernelTester::OpType::ELU);
8788 }
8789 }
8790 }
8791
TEST(F32_VELU__AVX2_RR1_P6_X80,alpha)8792 TEST(F32_VELU__AVX2_RR1_P6_X80, alpha) {
8793 TEST_REQUIRES_X86_AVX2;
8794 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
8795 for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
8796 VUnOpMicrokernelTester()
8797 .batch_size(batch_size)
8798 .alpha(alpha)
8799 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x80), VUnOpMicrokernelTester::OpType::ELU);
8800 }
8801 }
8802 }
8803
TEST(F32_VELU__AVX2_RR1_P6_X80,beta)8804 TEST(F32_VELU__AVX2_RR1_P6_X80, beta) {
8805 TEST_REQUIRES_X86_AVX2;
8806 for (float beta : std::vector<float>({0.3f, 3.0f})) {
8807 for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
8808 VUnOpMicrokernelTester()
8809 .batch_size(batch_size)
8810 .beta(beta)
8811 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx2_rr1_p6_x80), VUnOpMicrokernelTester::OpType::ELU);
8812 }
8813 }
8814 }
8815 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8816
8817
8818 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X16,batch_eq_16)8819 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X16, batch_eq_16) {
8820 TEST_REQUIRES_X86_AVX512F;
8821 VUnOpMicrokernelTester()
8822 .batch_size(16)
8823 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x16), VUnOpMicrokernelTester::OpType::ELU);
8824 }
8825
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X16,batch_div_16)8826 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X16, batch_div_16) {
8827 TEST_REQUIRES_X86_AVX512F;
8828 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
8829 VUnOpMicrokernelTester()
8830 .batch_size(batch_size)
8831 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x16), VUnOpMicrokernelTester::OpType::ELU);
8832 }
8833 }
8834
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X16,batch_lt_16)8835 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X16, batch_lt_16) {
8836 TEST_REQUIRES_X86_AVX512F;
8837 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
8838 VUnOpMicrokernelTester()
8839 .batch_size(batch_size)
8840 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x16), VUnOpMicrokernelTester::OpType::ELU);
8841 }
8842 }
8843
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X16,batch_gt_16)8844 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X16, batch_gt_16) {
8845 TEST_REQUIRES_X86_AVX512F;
8846 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
8847 VUnOpMicrokernelTester()
8848 .batch_size(batch_size)
8849 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x16), VUnOpMicrokernelTester::OpType::ELU);
8850 }
8851 }
8852
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X16,inplace)8853 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X16, inplace) {
8854 TEST_REQUIRES_X86_AVX512F;
8855 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
8856 VUnOpMicrokernelTester()
8857 .batch_size(batch_size)
8858 .inplace(true)
8859 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x16), VUnOpMicrokernelTester::OpType::ELU);
8860 }
8861 }
8862
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X16,prescale)8863 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X16, prescale) {
8864 TEST_REQUIRES_X86_AVX512F;
8865 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
8866 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
8867 VUnOpMicrokernelTester()
8868 .batch_size(batch_size)
8869 .prescale(prescale)
8870 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x16), VUnOpMicrokernelTester::OpType::ELU);
8871 }
8872 }
8873 }
8874
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X16,alpha)8875 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X16, alpha) {
8876 TEST_REQUIRES_X86_AVX512F;
8877 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
8878 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
8879 VUnOpMicrokernelTester()
8880 .batch_size(batch_size)
8881 .alpha(alpha)
8882 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x16), VUnOpMicrokernelTester::OpType::ELU);
8883 }
8884 }
8885 }
8886
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X16,beta)8887 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X16, beta) {
8888 TEST_REQUIRES_X86_AVX512F;
8889 for (float beta : std::vector<float>({0.3f, 3.0f})) {
8890 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
8891 VUnOpMicrokernelTester()
8892 .batch_size(batch_size)
8893 .beta(beta)
8894 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x16), VUnOpMicrokernelTester::OpType::ELU);
8895 }
8896 }
8897 }
8898 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8899
8900
8901 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X32,batch_eq_32)8902 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X32, batch_eq_32) {
8903 TEST_REQUIRES_X86_AVX512F;
8904 VUnOpMicrokernelTester()
8905 .batch_size(32)
8906 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x32), VUnOpMicrokernelTester::OpType::ELU);
8907 }
8908
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X32,batch_div_32)8909 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X32, batch_div_32) {
8910 TEST_REQUIRES_X86_AVX512F;
8911 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
8912 VUnOpMicrokernelTester()
8913 .batch_size(batch_size)
8914 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x32), VUnOpMicrokernelTester::OpType::ELU);
8915 }
8916 }
8917
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X32,batch_lt_32)8918 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X32, batch_lt_32) {
8919 TEST_REQUIRES_X86_AVX512F;
8920 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
8921 VUnOpMicrokernelTester()
8922 .batch_size(batch_size)
8923 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x32), VUnOpMicrokernelTester::OpType::ELU);
8924 }
8925 }
8926
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X32,batch_gt_32)8927 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X32, batch_gt_32) {
8928 TEST_REQUIRES_X86_AVX512F;
8929 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
8930 VUnOpMicrokernelTester()
8931 .batch_size(batch_size)
8932 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x32), VUnOpMicrokernelTester::OpType::ELU);
8933 }
8934 }
8935
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X32,inplace)8936 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X32, inplace) {
8937 TEST_REQUIRES_X86_AVX512F;
8938 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
8939 VUnOpMicrokernelTester()
8940 .batch_size(batch_size)
8941 .inplace(true)
8942 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x32), VUnOpMicrokernelTester::OpType::ELU);
8943 }
8944 }
8945
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X32,prescale)8946 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X32, prescale) {
8947 TEST_REQUIRES_X86_AVX512F;
8948 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
8949 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
8950 VUnOpMicrokernelTester()
8951 .batch_size(batch_size)
8952 .prescale(prescale)
8953 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x32), VUnOpMicrokernelTester::OpType::ELU);
8954 }
8955 }
8956 }
8957
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X32,alpha)8958 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X32, alpha) {
8959 TEST_REQUIRES_X86_AVX512F;
8960 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
8961 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
8962 VUnOpMicrokernelTester()
8963 .batch_size(batch_size)
8964 .alpha(alpha)
8965 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x32), VUnOpMicrokernelTester::OpType::ELU);
8966 }
8967 }
8968 }
8969
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X32,beta)8970 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X32, beta) {
8971 TEST_REQUIRES_X86_AVX512F;
8972 for (float beta : std::vector<float>({0.3f, 3.0f})) {
8973 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
8974 VUnOpMicrokernelTester()
8975 .batch_size(batch_size)
8976 .beta(beta)
8977 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x32), VUnOpMicrokernelTester::OpType::ELU);
8978 }
8979 }
8980 }
8981 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
8982
8983
8984 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X48,batch_eq_48)8985 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X48, batch_eq_48) {
8986 TEST_REQUIRES_X86_AVX512F;
8987 VUnOpMicrokernelTester()
8988 .batch_size(48)
8989 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x48), VUnOpMicrokernelTester::OpType::ELU);
8990 }
8991
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X48,batch_div_48)8992 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X48, batch_div_48) {
8993 TEST_REQUIRES_X86_AVX512F;
8994 for (size_t batch_size = 96; batch_size < 480; batch_size += 48) {
8995 VUnOpMicrokernelTester()
8996 .batch_size(batch_size)
8997 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x48), VUnOpMicrokernelTester::OpType::ELU);
8998 }
8999 }
9000
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X48,batch_lt_48)9001 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X48, batch_lt_48) {
9002 TEST_REQUIRES_X86_AVX512F;
9003 for (size_t batch_size = 1; batch_size < 48; batch_size++) {
9004 VUnOpMicrokernelTester()
9005 .batch_size(batch_size)
9006 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x48), VUnOpMicrokernelTester::OpType::ELU);
9007 }
9008 }
9009
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X48,batch_gt_48)9010 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X48, batch_gt_48) {
9011 TEST_REQUIRES_X86_AVX512F;
9012 for (size_t batch_size = 49; batch_size < 96; batch_size++) {
9013 VUnOpMicrokernelTester()
9014 .batch_size(batch_size)
9015 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x48), VUnOpMicrokernelTester::OpType::ELU);
9016 }
9017 }
9018
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X48,inplace)9019 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X48, inplace) {
9020 TEST_REQUIRES_X86_AVX512F;
9021 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
9022 VUnOpMicrokernelTester()
9023 .batch_size(batch_size)
9024 .inplace(true)
9025 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x48), VUnOpMicrokernelTester::OpType::ELU);
9026 }
9027 }
9028
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X48,prescale)9029 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X48, prescale) {
9030 TEST_REQUIRES_X86_AVX512F;
9031 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
9032 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
9033 VUnOpMicrokernelTester()
9034 .batch_size(batch_size)
9035 .prescale(prescale)
9036 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x48), VUnOpMicrokernelTester::OpType::ELU);
9037 }
9038 }
9039 }
9040
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X48,alpha)9041 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X48, alpha) {
9042 TEST_REQUIRES_X86_AVX512F;
9043 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
9044 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
9045 VUnOpMicrokernelTester()
9046 .batch_size(batch_size)
9047 .alpha(alpha)
9048 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x48), VUnOpMicrokernelTester::OpType::ELU);
9049 }
9050 }
9051 }
9052
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X48,beta)9053 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X48, beta) {
9054 TEST_REQUIRES_X86_AVX512F;
9055 for (float beta : std::vector<float>({0.3f, 3.0f})) {
9056 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
9057 VUnOpMicrokernelTester()
9058 .batch_size(batch_size)
9059 .beta(beta)
9060 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x48), VUnOpMicrokernelTester::OpType::ELU);
9061 }
9062 }
9063 }
9064 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9065
9066
9067 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X64,batch_eq_64)9068 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X64, batch_eq_64) {
9069 TEST_REQUIRES_X86_AVX512F;
9070 VUnOpMicrokernelTester()
9071 .batch_size(64)
9072 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x64), VUnOpMicrokernelTester::OpType::ELU);
9073 }
9074
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X64,batch_div_64)9075 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X64, batch_div_64) {
9076 TEST_REQUIRES_X86_AVX512F;
9077 for (size_t batch_size = 128; batch_size < 640; batch_size += 64) {
9078 VUnOpMicrokernelTester()
9079 .batch_size(batch_size)
9080 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x64), VUnOpMicrokernelTester::OpType::ELU);
9081 }
9082 }
9083
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X64,batch_lt_64)9084 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X64, batch_lt_64) {
9085 TEST_REQUIRES_X86_AVX512F;
9086 for (size_t batch_size = 1; batch_size < 64; batch_size++) {
9087 VUnOpMicrokernelTester()
9088 .batch_size(batch_size)
9089 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x64), VUnOpMicrokernelTester::OpType::ELU);
9090 }
9091 }
9092
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X64,batch_gt_64)9093 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X64, batch_gt_64) {
9094 TEST_REQUIRES_X86_AVX512F;
9095 for (size_t batch_size = 65; batch_size < 128; batch_size++) {
9096 VUnOpMicrokernelTester()
9097 .batch_size(batch_size)
9098 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x64), VUnOpMicrokernelTester::OpType::ELU);
9099 }
9100 }
9101
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X64,inplace)9102 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X64, inplace) {
9103 TEST_REQUIRES_X86_AVX512F;
9104 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
9105 VUnOpMicrokernelTester()
9106 .batch_size(batch_size)
9107 .inplace(true)
9108 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x64), VUnOpMicrokernelTester::OpType::ELU);
9109 }
9110 }
9111
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X64,prescale)9112 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X64, prescale) {
9113 TEST_REQUIRES_X86_AVX512F;
9114 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
9115 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
9116 VUnOpMicrokernelTester()
9117 .batch_size(batch_size)
9118 .prescale(prescale)
9119 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x64), VUnOpMicrokernelTester::OpType::ELU);
9120 }
9121 }
9122 }
9123
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X64,alpha)9124 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X64, alpha) {
9125 TEST_REQUIRES_X86_AVX512F;
9126 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
9127 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
9128 VUnOpMicrokernelTester()
9129 .batch_size(batch_size)
9130 .alpha(alpha)
9131 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x64), VUnOpMicrokernelTester::OpType::ELU);
9132 }
9133 }
9134 }
9135
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X64,beta)9136 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X64, beta) {
9137 TEST_REQUIRES_X86_AVX512F;
9138 for (float beta : std::vector<float>({0.3f, 3.0f})) {
9139 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
9140 VUnOpMicrokernelTester()
9141 .batch_size(batch_size)
9142 .beta(beta)
9143 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x64), VUnOpMicrokernelTester::OpType::ELU);
9144 }
9145 }
9146 }
9147 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9148
9149
9150 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X80,batch_eq_80)9151 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X80, batch_eq_80) {
9152 TEST_REQUIRES_X86_AVX512F;
9153 VUnOpMicrokernelTester()
9154 .batch_size(80)
9155 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x80), VUnOpMicrokernelTester::OpType::ELU);
9156 }
9157
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X80,batch_div_80)9158 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X80, batch_div_80) {
9159 TEST_REQUIRES_X86_AVX512F;
9160 for (size_t batch_size = 160; batch_size < 800; batch_size += 80) {
9161 VUnOpMicrokernelTester()
9162 .batch_size(batch_size)
9163 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x80), VUnOpMicrokernelTester::OpType::ELU);
9164 }
9165 }
9166
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X80,batch_lt_80)9167 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X80, batch_lt_80) {
9168 TEST_REQUIRES_X86_AVX512F;
9169 for (size_t batch_size = 1; batch_size < 80; batch_size++) {
9170 VUnOpMicrokernelTester()
9171 .batch_size(batch_size)
9172 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x80), VUnOpMicrokernelTester::OpType::ELU);
9173 }
9174 }
9175
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X80,batch_gt_80)9176 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X80, batch_gt_80) {
9177 TEST_REQUIRES_X86_AVX512F;
9178 for (size_t batch_size = 81; batch_size < 160; batch_size++) {
9179 VUnOpMicrokernelTester()
9180 .batch_size(batch_size)
9181 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x80), VUnOpMicrokernelTester::OpType::ELU);
9182 }
9183 }
9184
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X80,inplace)9185 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X80, inplace) {
9186 TEST_REQUIRES_X86_AVX512F;
9187 for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
9188 VUnOpMicrokernelTester()
9189 .batch_size(batch_size)
9190 .inplace(true)
9191 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x80), VUnOpMicrokernelTester::OpType::ELU);
9192 }
9193 }
9194
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X80,prescale)9195 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X80, prescale) {
9196 TEST_REQUIRES_X86_AVX512F;
9197 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
9198 for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
9199 VUnOpMicrokernelTester()
9200 .batch_size(batch_size)
9201 .prescale(prescale)
9202 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x80), VUnOpMicrokernelTester::OpType::ELU);
9203 }
9204 }
9205 }
9206
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X80,alpha)9207 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X80, alpha) {
9208 TEST_REQUIRES_X86_AVX512F;
9209 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
9210 for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
9211 VUnOpMicrokernelTester()
9212 .batch_size(batch_size)
9213 .alpha(alpha)
9214 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x80), VUnOpMicrokernelTester::OpType::ELU);
9215 }
9216 }
9217 }
9218
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X80,beta)9219 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X80, beta) {
9220 TEST_REQUIRES_X86_AVX512F;
9221 for (float beta : std::vector<float>({0.3f, 3.0f})) {
9222 for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
9223 VUnOpMicrokernelTester()
9224 .batch_size(batch_size)
9225 .beta(beta)
9226 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x80), VUnOpMicrokernelTester::OpType::ELU);
9227 }
9228 }
9229 }
9230 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9231
9232
9233 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X96,batch_eq_96)9234 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X96, batch_eq_96) {
9235 TEST_REQUIRES_X86_AVX512F;
9236 VUnOpMicrokernelTester()
9237 .batch_size(96)
9238 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96), VUnOpMicrokernelTester::OpType::ELU);
9239 }
9240
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X96,batch_div_96)9241 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X96, batch_div_96) {
9242 TEST_REQUIRES_X86_AVX512F;
9243 for (size_t batch_size = 192; batch_size < 960; batch_size += 96) {
9244 VUnOpMicrokernelTester()
9245 .batch_size(batch_size)
9246 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96), VUnOpMicrokernelTester::OpType::ELU);
9247 }
9248 }
9249
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X96,batch_lt_96)9250 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X96, batch_lt_96) {
9251 TEST_REQUIRES_X86_AVX512F;
9252 for (size_t batch_size = 1; batch_size < 96; batch_size++) {
9253 VUnOpMicrokernelTester()
9254 .batch_size(batch_size)
9255 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96), VUnOpMicrokernelTester::OpType::ELU);
9256 }
9257 }
9258
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X96,batch_gt_96)9259 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X96, batch_gt_96) {
9260 TEST_REQUIRES_X86_AVX512F;
9261 for (size_t batch_size = 97; batch_size < 192; batch_size++) {
9262 VUnOpMicrokernelTester()
9263 .batch_size(batch_size)
9264 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96), VUnOpMicrokernelTester::OpType::ELU);
9265 }
9266 }
9267
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X96,inplace)9268 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X96, inplace) {
9269 TEST_REQUIRES_X86_AVX512F;
9270 for (size_t batch_size = 1; batch_size <= 480; batch_size += 95) {
9271 VUnOpMicrokernelTester()
9272 .batch_size(batch_size)
9273 .inplace(true)
9274 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96), VUnOpMicrokernelTester::OpType::ELU);
9275 }
9276 }
9277
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X96,prescale)9278 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X96, prescale) {
9279 TEST_REQUIRES_X86_AVX512F;
9280 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
9281 for (size_t batch_size = 1; batch_size <= 480; batch_size += 95) {
9282 VUnOpMicrokernelTester()
9283 .batch_size(batch_size)
9284 .prescale(prescale)
9285 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96), VUnOpMicrokernelTester::OpType::ELU);
9286 }
9287 }
9288 }
9289
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X96,alpha)9290 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X96, alpha) {
9291 TEST_REQUIRES_X86_AVX512F;
9292 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
9293 for (size_t batch_size = 1; batch_size <= 480; batch_size += 95) {
9294 VUnOpMicrokernelTester()
9295 .batch_size(batch_size)
9296 .alpha(alpha)
9297 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96), VUnOpMicrokernelTester::OpType::ELU);
9298 }
9299 }
9300 }
9301
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X96,beta)9302 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X96, beta) {
9303 TEST_REQUIRES_X86_AVX512F;
9304 for (float beta : std::vector<float>({0.3f, 3.0f})) {
9305 for (size_t batch_size = 1; batch_size <= 480; batch_size += 95) {
9306 VUnOpMicrokernelTester()
9307 .batch_size(batch_size)
9308 .beta(beta)
9309 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96), VUnOpMicrokernelTester::OpType::ELU);
9310 }
9311 }
9312 }
9313 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9314
9315
9316 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X112,batch_eq_112)9317 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X112, batch_eq_112) {
9318 TEST_REQUIRES_X86_AVX512F;
9319 VUnOpMicrokernelTester()
9320 .batch_size(112)
9321 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112), VUnOpMicrokernelTester::OpType::ELU);
9322 }
9323
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X112,batch_div_112)9324 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X112, batch_div_112) {
9325 TEST_REQUIRES_X86_AVX512F;
9326 for (size_t batch_size = 224; batch_size < 1120; batch_size += 112) {
9327 VUnOpMicrokernelTester()
9328 .batch_size(batch_size)
9329 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112), VUnOpMicrokernelTester::OpType::ELU);
9330 }
9331 }
9332
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X112,batch_lt_112)9333 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X112, batch_lt_112) {
9334 TEST_REQUIRES_X86_AVX512F;
9335 for (size_t batch_size = 1; batch_size < 112; batch_size++) {
9336 VUnOpMicrokernelTester()
9337 .batch_size(batch_size)
9338 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112), VUnOpMicrokernelTester::OpType::ELU);
9339 }
9340 }
9341
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X112,batch_gt_112)9342 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X112, batch_gt_112) {
9343 TEST_REQUIRES_X86_AVX512F;
9344 for (size_t batch_size = 113; batch_size < 224; batch_size++) {
9345 VUnOpMicrokernelTester()
9346 .batch_size(batch_size)
9347 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112), VUnOpMicrokernelTester::OpType::ELU);
9348 }
9349 }
9350
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X112,inplace)9351 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X112, inplace) {
9352 TEST_REQUIRES_X86_AVX512F;
9353 for (size_t batch_size = 1; batch_size <= 560; batch_size += 111) {
9354 VUnOpMicrokernelTester()
9355 .batch_size(batch_size)
9356 .inplace(true)
9357 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112), VUnOpMicrokernelTester::OpType::ELU);
9358 }
9359 }
9360
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X112,prescale)9361 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X112, prescale) {
9362 TEST_REQUIRES_X86_AVX512F;
9363 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
9364 for (size_t batch_size = 1; batch_size <= 560; batch_size += 111) {
9365 VUnOpMicrokernelTester()
9366 .batch_size(batch_size)
9367 .prescale(prescale)
9368 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112), VUnOpMicrokernelTester::OpType::ELU);
9369 }
9370 }
9371 }
9372
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X112,alpha)9373 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X112, alpha) {
9374 TEST_REQUIRES_X86_AVX512F;
9375 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
9376 for (size_t batch_size = 1; batch_size <= 560; batch_size += 111) {
9377 VUnOpMicrokernelTester()
9378 .batch_size(batch_size)
9379 .alpha(alpha)
9380 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112), VUnOpMicrokernelTester::OpType::ELU);
9381 }
9382 }
9383 }
9384
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X112,beta)9385 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X112, beta) {
9386 TEST_REQUIRES_X86_AVX512F;
9387 for (float beta : std::vector<float>({0.3f, 3.0f})) {
9388 for (size_t batch_size = 1; batch_size <= 560; batch_size += 111) {
9389 VUnOpMicrokernelTester()
9390 .batch_size(batch_size)
9391 .beta(beta)
9392 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112), VUnOpMicrokernelTester::OpType::ELU);
9393 }
9394 }
9395 }
9396 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9397
9398
9399 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X128,batch_eq_128)9400 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X128, batch_eq_128) {
9401 TEST_REQUIRES_X86_AVX512F;
9402 VUnOpMicrokernelTester()
9403 .batch_size(128)
9404 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128), VUnOpMicrokernelTester::OpType::ELU);
9405 }
9406
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X128,batch_div_128)9407 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X128, batch_div_128) {
9408 TEST_REQUIRES_X86_AVX512F;
9409 for (size_t batch_size = 256; batch_size < 1280; batch_size += 128) {
9410 VUnOpMicrokernelTester()
9411 .batch_size(batch_size)
9412 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128), VUnOpMicrokernelTester::OpType::ELU);
9413 }
9414 }
9415
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X128,batch_lt_128)9416 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X128, batch_lt_128) {
9417 TEST_REQUIRES_X86_AVX512F;
9418 for (size_t batch_size = 1; batch_size < 128; batch_size++) {
9419 VUnOpMicrokernelTester()
9420 .batch_size(batch_size)
9421 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128), VUnOpMicrokernelTester::OpType::ELU);
9422 }
9423 }
9424
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X128,batch_gt_128)9425 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X128, batch_gt_128) {
9426 TEST_REQUIRES_X86_AVX512F;
9427 for (size_t batch_size = 129; batch_size < 256; batch_size++) {
9428 VUnOpMicrokernelTester()
9429 .batch_size(batch_size)
9430 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128), VUnOpMicrokernelTester::OpType::ELU);
9431 }
9432 }
9433
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X128,inplace)9434 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X128, inplace) {
9435 TEST_REQUIRES_X86_AVX512F;
9436 for (size_t batch_size = 1; batch_size <= 640; batch_size += 127) {
9437 VUnOpMicrokernelTester()
9438 .batch_size(batch_size)
9439 .inplace(true)
9440 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128), VUnOpMicrokernelTester::OpType::ELU);
9441 }
9442 }
9443
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X128,prescale)9444 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X128, prescale) {
9445 TEST_REQUIRES_X86_AVX512F;
9446 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
9447 for (size_t batch_size = 1; batch_size <= 640; batch_size += 127) {
9448 VUnOpMicrokernelTester()
9449 .batch_size(batch_size)
9450 .prescale(prescale)
9451 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128), VUnOpMicrokernelTester::OpType::ELU);
9452 }
9453 }
9454 }
9455
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X128,alpha)9456 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X128, alpha) {
9457 TEST_REQUIRES_X86_AVX512F;
9458 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
9459 for (size_t batch_size = 1; batch_size <= 640; batch_size += 127) {
9460 VUnOpMicrokernelTester()
9461 .batch_size(batch_size)
9462 .alpha(alpha)
9463 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128), VUnOpMicrokernelTester::OpType::ELU);
9464 }
9465 }
9466 }
9467
TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X128,beta)9468 TEST(F32_VELU__AVX512F_RR1_LUT16_P3_PERM_X128, beta) {
9469 TEST_REQUIRES_X86_AVX512F;
9470 for (float beta : std::vector<float>({0.3f, 3.0f})) {
9471 for (size_t batch_size = 1; batch_size <= 640; batch_size += 127) {
9472 VUnOpMicrokernelTester()
9473 .batch_size(batch_size)
9474 .beta(beta)
9475 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128), VUnOpMicrokernelTester::OpType::ELU);
9476 }
9477 }
9478 }
9479 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9480
9481
9482 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX512F_RR1_P6_X16,batch_eq_16)9483 TEST(F32_VELU__AVX512F_RR1_P6_X16, batch_eq_16) {
9484 TEST_REQUIRES_X86_AVX512F;
9485 VUnOpMicrokernelTester()
9486 .batch_size(16)
9487 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
9488 }
9489
TEST(F32_VELU__AVX512F_RR1_P6_X16,batch_div_16)9490 TEST(F32_VELU__AVX512F_RR1_P6_X16, batch_div_16) {
9491 TEST_REQUIRES_X86_AVX512F;
9492 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
9493 VUnOpMicrokernelTester()
9494 .batch_size(batch_size)
9495 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
9496 }
9497 }
9498
TEST(F32_VELU__AVX512F_RR1_P6_X16,batch_lt_16)9499 TEST(F32_VELU__AVX512F_RR1_P6_X16, batch_lt_16) {
9500 TEST_REQUIRES_X86_AVX512F;
9501 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
9502 VUnOpMicrokernelTester()
9503 .batch_size(batch_size)
9504 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
9505 }
9506 }
9507
TEST(F32_VELU__AVX512F_RR1_P6_X16,batch_gt_16)9508 TEST(F32_VELU__AVX512F_RR1_P6_X16, batch_gt_16) {
9509 TEST_REQUIRES_X86_AVX512F;
9510 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
9511 VUnOpMicrokernelTester()
9512 .batch_size(batch_size)
9513 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
9514 }
9515 }
9516
TEST(F32_VELU__AVX512F_RR1_P6_X16,inplace)9517 TEST(F32_VELU__AVX512F_RR1_P6_X16, inplace) {
9518 TEST_REQUIRES_X86_AVX512F;
9519 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
9520 VUnOpMicrokernelTester()
9521 .batch_size(batch_size)
9522 .inplace(true)
9523 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
9524 }
9525 }
9526
TEST(F32_VELU__AVX512F_RR1_P6_X16,prescale)9527 TEST(F32_VELU__AVX512F_RR1_P6_X16, prescale) {
9528 TEST_REQUIRES_X86_AVX512F;
9529 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
9530 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
9531 VUnOpMicrokernelTester()
9532 .batch_size(batch_size)
9533 .prescale(prescale)
9534 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
9535 }
9536 }
9537 }
9538
TEST(F32_VELU__AVX512F_RR1_P6_X16,alpha)9539 TEST(F32_VELU__AVX512F_RR1_P6_X16, alpha) {
9540 TEST_REQUIRES_X86_AVX512F;
9541 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
9542 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
9543 VUnOpMicrokernelTester()
9544 .batch_size(batch_size)
9545 .alpha(alpha)
9546 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
9547 }
9548 }
9549 }
9550
TEST(F32_VELU__AVX512F_RR1_P6_X16,beta)9551 TEST(F32_VELU__AVX512F_RR1_P6_X16, beta) {
9552 TEST_REQUIRES_X86_AVX512F;
9553 for (float beta : std::vector<float>({0.3f, 3.0f})) {
9554 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
9555 VUnOpMicrokernelTester()
9556 .batch_size(batch_size)
9557 .beta(beta)
9558 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
9559 }
9560 }
9561 }
9562 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9563
9564
9565 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX512F_RR1_P6_X32,batch_eq_32)9566 TEST(F32_VELU__AVX512F_RR1_P6_X32, batch_eq_32) {
9567 TEST_REQUIRES_X86_AVX512F;
9568 VUnOpMicrokernelTester()
9569 .batch_size(32)
9570 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_p6_x32), VUnOpMicrokernelTester::OpType::ELU);
9571 }
9572
TEST(F32_VELU__AVX512F_RR1_P6_X32,batch_div_32)9573 TEST(F32_VELU__AVX512F_RR1_P6_X32, batch_div_32) {
9574 TEST_REQUIRES_X86_AVX512F;
9575 for (size_t batch_size = 64; batch_size < 320; batch_size += 32) {
9576 VUnOpMicrokernelTester()
9577 .batch_size(batch_size)
9578 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_p6_x32), VUnOpMicrokernelTester::OpType::ELU);
9579 }
9580 }
9581
TEST(F32_VELU__AVX512F_RR1_P6_X32,batch_lt_32)9582 TEST(F32_VELU__AVX512F_RR1_P6_X32, batch_lt_32) {
9583 TEST_REQUIRES_X86_AVX512F;
9584 for (size_t batch_size = 1; batch_size < 32; batch_size++) {
9585 VUnOpMicrokernelTester()
9586 .batch_size(batch_size)
9587 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_p6_x32), VUnOpMicrokernelTester::OpType::ELU);
9588 }
9589 }
9590
TEST(F32_VELU__AVX512F_RR1_P6_X32,batch_gt_32)9591 TEST(F32_VELU__AVX512F_RR1_P6_X32, batch_gt_32) {
9592 TEST_REQUIRES_X86_AVX512F;
9593 for (size_t batch_size = 33; batch_size < 64; batch_size++) {
9594 VUnOpMicrokernelTester()
9595 .batch_size(batch_size)
9596 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_p6_x32), VUnOpMicrokernelTester::OpType::ELU);
9597 }
9598 }
9599
TEST(F32_VELU__AVX512F_RR1_P6_X32,inplace)9600 TEST(F32_VELU__AVX512F_RR1_P6_X32, inplace) {
9601 TEST_REQUIRES_X86_AVX512F;
9602 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
9603 VUnOpMicrokernelTester()
9604 .batch_size(batch_size)
9605 .inplace(true)
9606 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_p6_x32), VUnOpMicrokernelTester::OpType::ELU);
9607 }
9608 }
9609
TEST(F32_VELU__AVX512F_RR1_P6_X32,prescale)9610 TEST(F32_VELU__AVX512F_RR1_P6_X32, prescale) {
9611 TEST_REQUIRES_X86_AVX512F;
9612 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
9613 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
9614 VUnOpMicrokernelTester()
9615 .batch_size(batch_size)
9616 .prescale(prescale)
9617 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_p6_x32), VUnOpMicrokernelTester::OpType::ELU);
9618 }
9619 }
9620 }
9621
TEST(F32_VELU__AVX512F_RR1_P6_X32,alpha)9622 TEST(F32_VELU__AVX512F_RR1_P6_X32, alpha) {
9623 TEST_REQUIRES_X86_AVX512F;
9624 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
9625 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
9626 VUnOpMicrokernelTester()
9627 .batch_size(batch_size)
9628 .alpha(alpha)
9629 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_p6_x32), VUnOpMicrokernelTester::OpType::ELU);
9630 }
9631 }
9632 }
9633
TEST(F32_VELU__AVX512F_RR1_P6_X32,beta)9634 TEST(F32_VELU__AVX512F_RR1_P6_X32, beta) {
9635 TEST_REQUIRES_X86_AVX512F;
9636 for (float beta : std::vector<float>({0.3f, 3.0f})) {
9637 for (size_t batch_size = 1; batch_size <= 160; batch_size += 31) {
9638 VUnOpMicrokernelTester()
9639 .batch_size(batch_size)
9640 .beta(beta)
9641 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_p6_x32), VUnOpMicrokernelTester::OpType::ELU);
9642 }
9643 }
9644 }
9645 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9646
9647
9648 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX512F_RR1_P6_X48,batch_eq_48)9649 TEST(F32_VELU__AVX512F_RR1_P6_X48, batch_eq_48) {
9650 TEST_REQUIRES_X86_AVX512F;
9651 VUnOpMicrokernelTester()
9652 .batch_size(48)
9653 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_p6_x48), VUnOpMicrokernelTester::OpType::ELU);
9654 }
9655
TEST(F32_VELU__AVX512F_RR1_P6_X48,batch_div_48)9656 TEST(F32_VELU__AVX512F_RR1_P6_X48, batch_div_48) {
9657 TEST_REQUIRES_X86_AVX512F;
9658 for (size_t batch_size = 96; batch_size < 480; batch_size += 48) {
9659 VUnOpMicrokernelTester()
9660 .batch_size(batch_size)
9661 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_p6_x48), VUnOpMicrokernelTester::OpType::ELU);
9662 }
9663 }
9664
TEST(F32_VELU__AVX512F_RR1_P6_X48,batch_lt_48)9665 TEST(F32_VELU__AVX512F_RR1_P6_X48, batch_lt_48) {
9666 TEST_REQUIRES_X86_AVX512F;
9667 for (size_t batch_size = 1; batch_size < 48; batch_size++) {
9668 VUnOpMicrokernelTester()
9669 .batch_size(batch_size)
9670 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_p6_x48), VUnOpMicrokernelTester::OpType::ELU);
9671 }
9672 }
9673
TEST(F32_VELU__AVX512F_RR1_P6_X48,batch_gt_48)9674 TEST(F32_VELU__AVX512F_RR1_P6_X48, batch_gt_48) {
9675 TEST_REQUIRES_X86_AVX512F;
9676 for (size_t batch_size = 49; batch_size < 96; batch_size++) {
9677 VUnOpMicrokernelTester()
9678 .batch_size(batch_size)
9679 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_p6_x48), VUnOpMicrokernelTester::OpType::ELU);
9680 }
9681 }
9682
TEST(F32_VELU__AVX512F_RR1_P6_X48,inplace)9683 TEST(F32_VELU__AVX512F_RR1_P6_X48, inplace) {
9684 TEST_REQUIRES_X86_AVX512F;
9685 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
9686 VUnOpMicrokernelTester()
9687 .batch_size(batch_size)
9688 .inplace(true)
9689 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_p6_x48), VUnOpMicrokernelTester::OpType::ELU);
9690 }
9691 }
9692
TEST(F32_VELU__AVX512F_RR1_P6_X48,prescale)9693 TEST(F32_VELU__AVX512F_RR1_P6_X48, prescale) {
9694 TEST_REQUIRES_X86_AVX512F;
9695 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
9696 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
9697 VUnOpMicrokernelTester()
9698 .batch_size(batch_size)
9699 .prescale(prescale)
9700 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_p6_x48), VUnOpMicrokernelTester::OpType::ELU);
9701 }
9702 }
9703 }
9704
TEST(F32_VELU__AVX512F_RR1_P6_X48,alpha)9705 TEST(F32_VELU__AVX512F_RR1_P6_X48, alpha) {
9706 TEST_REQUIRES_X86_AVX512F;
9707 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
9708 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
9709 VUnOpMicrokernelTester()
9710 .batch_size(batch_size)
9711 .alpha(alpha)
9712 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_p6_x48), VUnOpMicrokernelTester::OpType::ELU);
9713 }
9714 }
9715 }
9716
TEST(F32_VELU__AVX512F_RR1_P6_X48,beta)9717 TEST(F32_VELU__AVX512F_RR1_P6_X48, beta) {
9718 TEST_REQUIRES_X86_AVX512F;
9719 for (float beta : std::vector<float>({0.3f, 3.0f})) {
9720 for (size_t batch_size = 1; batch_size <= 240; batch_size += 47) {
9721 VUnOpMicrokernelTester()
9722 .batch_size(batch_size)
9723 .beta(beta)
9724 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_p6_x48), VUnOpMicrokernelTester::OpType::ELU);
9725 }
9726 }
9727 }
9728 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9729
9730
9731 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX512F_RR1_P6_X64,batch_eq_64)9732 TEST(F32_VELU__AVX512F_RR1_P6_X64, batch_eq_64) {
9733 TEST_REQUIRES_X86_AVX512F;
9734 VUnOpMicrokernelTester()
9735 .batch_size(64)
9736 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_p6_x64), VUnOpMicrokernelTester::OpType::ELU);
9737 }
9738
TEST(F32_VELU__AVX512F_RR1_P6_X64,batch_div_64)9739 TEST(F32_VELU__AVX512F_RR1_P6_X64, batch_div_64) {
9740 TEST_REQUIRES_X86_AVX512F;
9741 for (size_t batch_size = 128; batch_size < 640; batch_size += 64) {
9742 VUnOpMicrokernelTester()
9743 .batch_size(batch_size)
9744 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_p6_x64), VUnOpMicrokernelTester::OpType::ELU);
9745 }
9746 }
9747
TEST(F32_VELU__AVX512F_RR1_P6_X64,batch_lt_64)9748 TEST(F32_VELU__AVX512F_RR1_P6_X64, batch_lt_64) {
9749 TEST_REQUIRES_X86_AVX512F;
9750 for (size_t batch_size = 1; batch_size < 64; batch_size++) {
9751 VUnOpMicrokernelTester()
9752 .batch_size(batch_size)
9753 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_p6_x64), VUnOpMicrokernelTester::OpType::ELU);
9754 }
9755 }
9756
TEST(F32_VELU__AVX512F_RR1_P6_X64,batch_gt_64)9757 TEST(F32_VELU__AVX512F_RR1_P6_X64, batch_gt_64) {
9758 TEST_REQUIRES_X86_AVX512F;
9759 for (size_t batch_size = 65; batch_size < 128; batch_size++) {
9760 VUnOpMicrokernelTester()
9761 .batch_size(batch_size)
9762 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_p6_x64), VUnOpMicrokernelTester::OpType::ELU);
9763 }
9764 }
9765
TEST(F32_VELU__AVX512F_RR1_P6_X64,inplace)9766 TEST(F32_VELU__AVX512F_RR1_P6_X64, inplace) {
9767 TEST_REQUIRES_X86_AVX512F;
9768 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
9769 VUnOpMicrokernelTester()
9770 .batch_size(batch_size)
9771 .inplace(true)
9772 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_p6_x64), VUnOpMicrokernelTester::OpType::ELU);
9773 }
9774 }
9775
TEST(F32_VELU__AVX512F_RR1_P6_X64,prescale)9776 TEST(F32_VELU__AVX512F_RR1_P6_X64, prescale) {
9777 TEST_REQUIRES_X86_AVX512F;
9778 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
9779 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
9780 VUnOpMicrokernelTester()
9781 .batch_size(batch_size)
9782 .prescale(prescale)
9783 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_p6_x64), VUnOpMicrokernelTester::OpType::ELU);
9784 }
9785 }
9786 }
9787
TEST(F32_VELU__AVX512F_RR1_P6_X64,alpha)9788 TEST(F32_VELU__AVX512F_RR1_P6_X64, alpha) {
9789 TEST_REQUIRES_X86_AVX512F;
9790 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
9791 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
9792 VUnOpMicrokernelTester()
9793 .batch_size(batch_size)
9794 .alpha(alpha)
9795 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_p6_x64), VUnOpMicrokernelTester::OpType::ELU);
9796 }
9797 }
9798 }
9799
TEST(F32_VELU__AVX512F_RR1_P6_X64,beta)9800 TEST(F32_VELU__AVX512F_RR1_P6_X64, beta) {
9801 TEST_REQUIRES_X86_AVX512F;
9802 for (float beta : std::vector<float>({0.3f, 3.0f})) {
9803 for (size_t batch_size = 1; batch_size <= 320; batch_size += 63) {
9804 VUnOpMicrokernelTester()
9805 .batch_size(batch_size)
9806 .beta(beta)
9807 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_p6_x64), VUnOpMicrokernelTester::OpType::ELU);
9808 }
9809 }
9810 }
9811 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9812
9813
9814 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX512F_RR1_P6_X80,batch_eq_80)9815 TEST(F32_VELU__AVX512F_RR1_P6_X80, batch_eq_80) {
9816 TEST_REQUIRES_X86_AVX512F;
9817 VUnOpMicrokernelTester()
9818 .batch_size(80)
9819 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_p6_x80), VUnOpMicrokernelTester::OpType::ELU);
9820 }
9821
TEST(F32_VELU__AVX512F_RR1_P6_X80,batch_div_80)9822 TEST(F32_VELU__AVX512F_RR1_P6_X80, batch_div_80) {
9823 TEST_REQUIRES_X86_AVX512F;
9824 for (size_t batch_size = 160; batch_size < 800; batch_size += 80) {
9825 VUnOpMicrokernelTester()
9826 .batch_size(batch_size)
9827 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_p6_x80), VUnOpMicrokernelTester::OpType::ELU);
9828 }
9829 }
9830
TEST(F32_VELU__AVX512F_RR1_P6_X80,batch_lt_80)9831 TEST(F32_VELU__AVX512F_RR1_P6_X80, batch_lt_80) {
9832 TEST_REQUIRES_X86_AVX512F;
9833 for (size_t batch_size = 1; batch_size < 80; batch_size++) {
9834 VUnOpMicrokernelTester()
9835 .batch_size(batch_size)
9836 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_p6_x80), VUnOpMicrokernelTester::OpType::ELU);
9837 }
9838 }
9839
TEST(F32_VELU__AVX512F_RR1_P6_X80,batch_gt_80)9840 TEST(F32_VELU__AVX512F_RR1_P6_X80, batch_gt_80) {
9841 TEST_REQUIRES_X86_AVX512F;
9842 for (size_t batch_size = 81; batch_size < 160; batch_size++) {
9843 VUnOpMicrokernelTester()
9844 .batch_size(batch_size)
9845 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_p6_x80), VUnOpMicrokernelTester::OpType::ELU);
9846 }
9847 }
9848
TEST(F32_VELU__AVX512F_RR1_P6_X80,inplace)9849 TEST(F32_VELU__AVX512F_RR1_P6_X80, inplace) {
9850 TEST_REQUIRES_X86_AVX512F;
9851 for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
9852 VUnOpMicrokernelTester()
9853 .batch_size(batch_size)
9854 .inplace(true)
9855 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_p6_x80), VUnOpMicrokernelTester::OpType::ELU);
9856 }
9857 }
9858
TEST(F32_VELU__AVX512F_RR1_P6_X80,prescale)9859 TEST(F32_VELU__AVX512F_RR1_P6_X80, prescale) {
9860 TEST_REQUIRES_X86_AVX512F;
9861 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
9862 for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
9863 VUnOpMicrokernelTester()
9864 .batch_size(batch_size)
9865 .prescale(prescale)
9866 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_p6_x80), VUnOpMicrokernelTester::OpType::ELU);
9867 }
9868 }
9869 }
9870
TEST(F32_VELU__AVX512F_RR1_P6_X80,alpha)9871 TEST(F32_VELU__AVX512F_RR1_P6_X80, alpha) {
9872 TEST_REQUIRES_X86_AVX512F;
9873 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
9874 for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
9875 VUnOpMicrokernelTester()
9876 .batch_size(batch_size)
9877 .alpha(alpha)
9878 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_p6_x80), VUnOpMicrokernelTester::OpType::ELU);
9879 }
9880 }
9881 }
9882
TEST(F32_VELU__AVX512F_RR1_P6_X80,beta)9883 TEST(F32_VELU__AVX512F_RR1_P6_X80, beta) {
9884 TEST_REQUIRES_X86_AVX512F;
9885 for (float beta : std::vector<float>({0.3f, 3.0f})) {
9886 for (size_t batch_size = 1; batch_size <= 400; batch_size += 79) {
9887 VUnOpMicrokernelTester()
9888 .batch_size(batch_size)
9889 .beta(beta)
9890 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_p6_x80), VUnOpMicrokernelTester::OpType::ELU);
9891 }
9892 }
9893 }
9894 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9895
9896
9897 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX512F_RR1_P6_X96,batch_eq_96)9898 TEST(F32_VELU__AVX512F_RR1_P6_X96, batch_eq_96) {
9899 TEST_REQUIRES_X86_AVX512F;
9900 VUnOpMicrokernelTester()
9901 .batch_size(96)
9902 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_p6_x96), VUnOpMicrokernelTester::OpType::ELU);
9903 }
9904
TEST(F32_VELU__AVX512F_RR1_P6_X96,batch_div_96)9905 TEST(F32_VELU__AVX512F_RR1_P6_X96, batch_div_96) {
9906 TEST_REQUIRES_X86_AVX512F;
9907 for (size_t batch_size = 192; batch_size < 960; batch_size += 96) {
9908 VUnOpMicrokernelTester()
9909 .batch_size(batch_size)
9910 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_p6_x96), VUnOpMicrokernelTester::OpType::ELU);
9911 }
9912 }
9913
TEST(F32_VELU__AVX512F_RR1_P6_X96,batch_lt_96)9914 TEST(F32_VELU__AVX512F_RR1_P6_X96, batch_lt_96) {
9915 TEST_REQUIRES_X86_AVX512F;
9916 for (size_t batch_size = 1; batch_size < 96; batch_size++) {
9917 VUnOpMicrokernelTester()
9918 .batch_size(batch_size)
9919 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_p6_x96), VUnOpMicrokernelTester::OpType::ELU);
9920 }
9921 }
9922
TEST(F32_VELU__AVX512F_RR1_P6_X96,batch_gt_96)9923 TEST(F32_VELU__AVX512F_RR1_P6_X96, batch_gt_96) {
9924 TEST_REQUIRES_X86_AVX512F;
9925 for (size_t batch_size = 97; batch_size < 192; batch_size++) {
9926 VUnOpMicrokernelTester()
9927 .batch_size(batch_size)
9928 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_p6_x96), VUnOpMicrokernelTester::OpType::ELU);
9929 }
9930 }
9931
TEST(F32_VELU__AVX512F_RR1_P6_X96,inplace)9932 TEST(F32_VELU__AVX512F_RR1_P6_X96, inplace) {
9933 TEST_REQUIRES_X86_AVX512F;
9934 for (size_t batch_size = 1; batch_size <= 480; batch_size += 95) {
9935 VUnOpMicrokernelTester()
9936 .batch_size(batch_size)
9937 .inplace(true)
9938 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_p6_x96), VUnOpMicrokernelTester::OpType::ELU);
9939 }
9940 }
9941
TEST(F32_VELU__AVX512F_RR1_P6_X96,prescale)9942 TEST(F32_VELU__AVX512F_RR1_P6_X96, prescale) {
9943 TEST_REQUIRES_X86_AVX512F;
9944 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
9945 for (size_t batch_size = 1; batch_size <= 480; batch_size += 95) {
9946 VUnOpMicrokernelTester()
9947 .batch_size(batch_size)
9948 .prescale(prescale)
9949 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_p6_x96), VUnOpMicrokernelTester::OpType::ELU);
9950 }
9951 }
9952 }
9953
TEST(F32_VELU__AVX512F_RR1_P6_X96,alpha)9954 TEST(F32_VELU__AVX512F_RR1_P6_X96, alpha) {
9955 TEST_REQUIRES_X86_AVX512F;
9956 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
9957 for (size_t batch_size = 1; batch_size <= 480; batch_size += 95) {
9958 VUnOpMicrokernelTester()
9959 .batch_size(batch_size)
9960 .alpha(alpha)
9961 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_p6_x96), VUnOpMicrokernelTester::OpType::ELU);
9962 }
9963 }
9964 }
9965
TEST(F32_VELU__AVX512F_RR1_P6_X96,beta)9966 TEST(F32_VELU__AVX512F_RR1_P6_X96, beta) {
9967 TEST_REQUIRES_X86_AVX512F;
9968 for (float beta : std::vector<float>({0.3f, 3.0f})) {
9969 for (size_t batch_size = 1; batch_size <= 480; batch_size += 95) {
9970 VUnOpMicrokernelTester()
9971 .batch_size(batch_size)
9972 .beta(beta)
9973 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_p6_x96), VUnOpMicrokernelTester::OpType::ELU);
9974 }
9975 }
9976 }
9977 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
9978
9979
9980 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX512F_RR1_P6_X112,batch_eq_112)9981 TEST(F32_VELU__AVX512F_RR1_P6_X112, batch_eq_112) {
9982 TEST_REQUIRES_X86_AVX512F;
9983 VUnOpMicrokernelTester()
9984 .batch_size(112)
9985 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_p6_x112), VUnOpMicrokernelTester::OpType::ELU);
9986 }
9987
TEST(F32_VELU__AVX512F_RR1_P6_X112,batch_div_112)9988 TEST(F32_VELU__AVX512F_RR1_P6_X112, batch_div_112) {
9989 TEST_REQUIRES_X86_AVX512F;
9990 for (size_t batch_size = 224; batch_size < 1120; batch_size += 112) {
9991 VUnOpMicrokernelTester()
9992 .batch_size(batch_size)
9993 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_p6_x112), VUnOpMicrokernelTester::OpType::ELU);
9994 }
9995 }
9996
TEST(F32_VELU__AVX512F_RR1_P6_X112,batch_lt_112)9997 TEST(F32_VELU__AVX512F_RR1_P6_X112, batch_lt_112) {
9998 TEST_REQUIRES_X86_AVX512F;
9999 for (size_t batch_size = 1; batch_size < 112; batch_size++) {
10000 VUnOpMicrokernelTester()
10001 .batch_size(batch_size)
10002 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_p6_x112), VUnOpMicrokernelTester::OpType::ELU);
10003 }
10004 }
10005
TEST(F32_VELU__AVX512F_RR1_P6_X112,batch_gt_112)10006 TEST(F32_VELU__AVX512F_RR1_P6_X112, batch_gt_112) {
10007 TEST_REQUIRES_X86_AVX512F;
10008 for (size_t batch_size = 113; batch_size < 224; batch_size++) {
10009 VUnOpMicrokernelTester()
10010 .batch_size(batch_size)
10011 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_p6_x112), VUnOpMicrokernelTester::OpType::ELU);
10012 }
10013 }
10014
TEST(F32_VELU__AVX512F_RR1_P6_X112,inplace)10015 TEST(F32_VELU__AVX512F_RR1_P6_X112, inplace) {
10016 TEST_REQUIRES_X86_AVX512F;
10017 for (size_t batch_size = 1; batch_size <= 560; batch_size += 111) {
10018 VUnOpMicrokernelTester()
10019 .batch_size(batch_size)
10020 .inplace(true)
10021 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_p6_x112), VUnOpMicrokernelTester::OpType::ELU);
10022 }
10023 }
10024
TEST(F32_VELU__AVX512F_RR1_P6_X112,prescale)10025 TEST(F32_VELU__AVX512F_RR1_P6_X112, prescale) {
10026 TEST_REQUIRES_X86_AVX512F;
10027 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
10028 for (size_t batch_size = 1; batch_size <= 560; batch_size += 111) {
10029 VUnOpMicrokernelTester()
10030 .batch_size(batch_size)
10031 .prescale(prescale)
10032 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_p6_x112), VUnOpMicrokernelTester::OpType::ELU);
10033 }
10034 }
10035 }
10036
TEST(F32_VELU__AVX512F_RR1_P6_X112,alpha)10037 TEST(F32_VELU__AVX512F_RR1_P6_X112, alpha) {
10038 TEST_REQUIRES_X86_AVX512F;
10039 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
10040 for (size_t batch_size = 1; batch_size <= 560; batch_size += 111) {
10041 VUnOpMicrokernelTester()
10042 .batch_size(batch_size)
10043 .alpha(alpha)
10044 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_p6_x112), VUnOpMicrokernelTester::OpType::ELU);
10045 }
10046 }
10047 }
10048
TEST(F32_VELU__AVX512F_RR1_P6_X112,beta)10049 TEST(F32_VELU__AVX512F_RR1_P6_X112, beta) {
10050 TEST_REQUIRES_X86_AVX512F;
10051 for (float beta : std::vector<float>({0.3f, 3.0f})) {
10052 for (size_t batch_size = 1; batch_size <= 560; batch_size += 111) {
10053 VUnOpMicrokernelTester()
10054 .batch_size(batch_size)
10055 .beta(beta)
10056 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_p6_x112), VUnOpMicrokernelTester::OpType::ELU);
10057 }
10058 }
10059 }
10060 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
10061
10062
10063 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
TEST(F32_VELU__AVX512F_RR1_P6_X128,batch_eq_128)10064 TEST(F32_VELU__AVX512F_RR1_P6_X128, batch_eq_128) {
10065 TEST_REQUIRES_X86_AVX512F;
10066 VUnOpMicrokernelTester()
10067 .batch_size(128)
10068 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_p6_x128), VUnOpMicrokernelTester::OpType::ELU);
10069 }
10070
TEST(F32_VELU__AVX512F_RR1_P6_X128,batch_div_128)10071 TEST(F32_VELU__AVX512F_RR1_P6_X128, batch_div_128) {
10072 TEST_REQUIRES_X86_AVX512F;
10073 for (size_t batch_size = 256; batch_size < 1280; batch_size += 128) {
10074 VUnOpMicrokernelTester()
10075 .batch_size(batch_size)
10076 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_p6_x128), VUnOpMicrokernelTester::OpType::ELU);
10077 }
10078 }
10079
TEST(F32_VELU__AVX512F_RR1_P6_X128,batch_lt_128)10080 TEST(F32_VELU__AVX512F_RR1_P6_X128, batch_lt_128) {
10081 TEST_REQUIRES_X86_AVX512F;
10082 for (size_t batch_size = 1; batch_size < 128; batch_size++) {
10083 VUnOpMicrokernelTester()
10084 .batch_size(batch_size)
10085 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_p6_x128), VUnOpMicrokernelTester::OpType::ELU);
10086 }
10087 }
10088
TEST(F32_VELU__AVX512F_RR1_P6_X128,batch_gt_128)10089 TEST(F32_VELU__AVX512F_RR1_P6_X128, batch_gt_128) {
10090 TEST_REQUIRES_X86_AVX512F;
10091 for (size_t batch_size = 129; batch_size < 256; batch_size++) {
10092 VUnOpMicrokernelTester()
10093 .batch_size(batch_size)
10094 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_p6_x128), VUnOpMicrokernelTester::OpType::ELU);
10095 }
10096 }
10097
TEST(F32_VELU__AVX512F_RR1_P6_X128,inplace)10098 TEST(F32_VELU__AVX512F_RR1_P6_X128, inplace) {
10099 TEST_REQUIRES_X86_AVX512F;
10100 for (size_t batch_size = 1; batch_size <= 640; batch_size += 127) {
10101 VUnOpMicrokernelTester()
10102 .batch_size(batch_size)
10103 .inplace(true)
10104 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_p6_x128), VUnOpMicrokernelTester::OpType::ELU);
10105 }
10106 }
10107
TEST(F32_VELU__AVX512F_RR1_P6_X128,prescale)10108 TEST(F32_VELU__AVX512F_RR1_P6_X128, prescale) {
10109 TEST_REQUIRES_X86_AVX512F;
10110 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
10111 for (size_t batch_size = 1; batch_size <= 640; batch_size += 127) {
10112 VUnOpMicrokernelTester()
10113 .batch_size(batch_size)
10114 .prescale(prescale)
10115 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_p6_x128), VUnOpMicrokernelTester::OpType::ELU);
10116 }
10117 }
10118 }
10119
TEST(F32_VELU__AVX512F_RR1_P6_X128,alpha)10120 TEST(F32_VELU__AVX512F_RR1_P6_X128, alpha) {
10121 TEST_REQUIRES_X86_AVX512F;
10122 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
10123 for (size_t batch_size = 1; batch_size <= 640; batch_size += 127) {
10124 VUnOpMicrokernelTester()
10125 .batch_size(batch_size)
10126 .alpha(alpha)
10127 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_p6_x128), VUnOpMicrokernelTester::OpType::ELU);
10128 }
10129 }
10130 }
10131
TEST(F32_VELU__AVX512F_RR1_P6_X128,beta)10132 TEST(F32_VELU__AVX512F_RR1_P6_X128, beta) {
10133 TEST_REQUIRES_X86_AVX512F;
10134 for (float beta : std::vector<float>({0.3f, 3.0f})) {
10135 for (size_t batch_size = 1; batch_size <= 640; batch_size += 127) {
10136 VUnOpMicrokernelTester()
10137 .batch_size(batch_size)
10138 .beta(beta)
10139 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__avx512f_rr1_p6_x128), VUnOpMicrokernelTester::OpType::ELU);
10140 }
10141 }
10142 }
10143 #endif // XNN_ARCH_X86 || XNN_ARCH_X86_64
10144
10145
10146 #if XNN_ARCH_WASMSIMD
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X4,batch_eq_4)10147 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X4, batch_eq_4) {
10148 VUnOpMicrokernelTester()
10149 .batch_size(4)
10150 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x4), VUnOpMicrokernelTester::OpType::ELU);
10151 }
10152
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X4,batch_div_4)10153 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X4, batch_div_4) {
10154 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
10155 VUnOpMicrokernelTester()
10156 .batch_size(batch_size)
10157 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x4), VUnOpMicrokernelTester::OpType::ELU);
10158 }
10159 }
10160
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X4,batch_lt_4)10161 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X4, batch_lt_4) {
10162 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
10163 VUnOpMicrokernelTester()
10164 .batch_size(batch_size)
10165 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x4), VUnOpMicrokernelTester::OpType::ELU);
10166 }
10167 }
10168
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X4,batch_gt_4)10169 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X4, batch_gt_4) {
10170 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
10171 VUnOpMicrokernelTester()
10172 .batch_size(batch_size)
10173 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x4), VUnOpMicrokernelTester::OpType::ELU);
10174 }
10175 }
10176
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X4,inplace)10177 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X4, inplace) {
10178 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
10179 VUnOpMicrokernelTester()
10180 .batch_size(batch_size)
10181 .inplace(true)
10182 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x4), VUnOpMicrokernelTester::OpType::ELU);
10183 }
10184 }
10185
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X4,prescale)10186 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X4, prescale) {
10187 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
10188 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
10189 VUnOpMicrokernelTester()
10190 .batch_size(batch_size)
10191 .prescale(prescale)
10192 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x4), VUnOpMicrokernelTester::OpType::ELU);
10193 }
10194 }
10195 }
10196
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X4,alpha)10197 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X4, alpha) {
10198 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
10199 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
10200 VUnOpMicrokernelTester()
10201 .batch_size(batch_size)
10202 .alpha(alpha)
10203 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x4), VUnOpMicrokernelTester::OpType::ELU);
10204 }
10205 }
10206 }
10207
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X4,beta)10208 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X4, beta) {
10209 for (float beta : std::vector<float>({0.3f, 3.0f})) {
10210 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
10211 VUnOpMicrokernelTester()
10212 .batch_size(batch_size)
10213 .beta(beta)
10214 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x4), VUnOpMicrokernelTester::OpType::ELU);
10215 }
10216 }
10217 }
10218 #endif // XNN_ARCH_WASMSIMD
10219
10220
10221 #if XNN_ARCH_WASMSIMD
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X8,batch_eq_8)10222 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X8, batch_eq_8) {
10223 VUnOpMicrokernelTester()
10224 .batch_size(8)
10225 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x8), VUnOpMicrokernelTester::OpType::ELU);
10226 }
10227
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X8,batch_div_8)10228 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X8, batch_div_8) {
10229 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
10230 VUnOpMicrokernelTester()
10231 .batch_size(batch_size)
10232 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x8), VUnOpMicrokernelTester::OpType::ELU);
10233 }
10234 }
10235
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X8,batch_lt_8)10236 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X8, batch_lt_8) {
10237 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
10238 VUnOpMicrokernelTester()
10239 .batch_size(batch_size)
10240 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x8), VUnOpMicrokernelTester::OpType::ELU);
10241 }
10242 }
10243
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X8,batch_gt_8)10244 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X8, batch_gt_8) {
10245 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
10246 VUnOpMicrokernelTester()
10247 .batch_size(batch_size)
10248 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x8), VUnOpMicrokernelTester::OpType::ELU);
10249 }
10250 }
10251
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X8,inplace)10252 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X8, inplace) {
10253 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
10254 VUnOpMicrokernelTester()
10255 .batch_size(batch_size)
10256 .inplace(true)
10257 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x8), VUnOpMicrokernelTester::OpType::ELU);
10258 }
10259 }
10260
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X8,prescale)10261 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X8, prescale) {
10262 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
10263 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
10264 VUnOpMicrokernelTester()
10265 .batch_size(batch_size)
10266 .prescale(prescale)
10267 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x8), VUnOpMicrokernelTester::OpType::ELU);
10268 }
10269 }
10270 }
10271
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X8,alpha)10272 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X8, alpha) {
10273 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
10274 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
10275 VUnOpMicrokernelTester()
10276 .batch_size(batch_size)
10277 .alpha(alpha)
10278 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x8), VUnOpMicrokernelTester::OpType::ELU);
10279 }
10280 }
10281 }
10282
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X8,beta)10283 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X8, beta) {
10284 for (float beta : std::vector<float>({0.3f, 3.0f})) {
10285 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
10286 VUnOpMicrokernelTester()
10287 .batch_size(batch_size)
10288 .beta(beta)
10289 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x8), VUnOpMicrokernelTester::OpType::ELU);
10290 }
10291 }
10292 }
10293 #endif // XNN_ARCH_WASMSIMD
10294
10295
10296 #if XNN_ARCH_WASMSIMD
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X12,batch_eq_12)10297 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X12, batch_eq_12) {
10298 VUnOpMicrokernelTester()
10299 .batch_size(12)
10300 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x12), VUnOpMicrokernelTester::OpType::ELU);
10301 }
10302
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X12,batch_div_12)10303 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X12, batch_div_12) {
10304 for (size_t batch_size = 24; batch_size < 120; batch_size += 12) {
10305 VUnOpMicrokernelTester()
10306 .batch_size(batch_size)
10307 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x12), VUnOpMicrokernelTester::OpType::ELU);
10308 }
10309 }
10310
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X12,batch_lt_12)10311 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X12, batch_lt_12) {
10312 for (size_t batch_size = 1; batch_size < 12; batch_size++) {
10313 VUnOpMicrokernelTester()
10314 .batch_size(batch_size)
10315 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x12), VUnOpMicrokernelTester::OpType::ELU);
10316 }
10317 }
10318
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X12,batch_gt_12)10319 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X12, batch_gt_12) {
10320 for (size_t batch_size = 13; batch_size < 24; batch_size++) {
10321 VUnOpMicrokernelTester()
10322 .batch_size(batch_size)
10323 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x12), VUnOpMicrokernelTester::OpType::ELU);
10324 }
10325 }
10326
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X12,inplace)10327 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X12, inplace) {
10328 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
10329 VUnOpMicrokernelTester()
10330 .batch_size(batch_size)
10331 .inplace(true)
10332 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x12), VUnOpMicrokernelTester::OpType::ELU);
10333 }
10334 }
10335
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X12,prescale)10336 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X12, prescale) {
10337 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
10338 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
10339 VUnOpMicrokernelTester()
10340 .batch_size(batch_size)
10341 .prescale(prescale)
10342 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x12), VUnOpMicrokernelTester::OpType::ELU);
10343 }
10344 }
10345 }
10346
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X12,alpha)10347 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X12, alpha) {
10348 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
10349 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
10350 VUnOpMicrokernelTester()
10351 .batch_size(batch_size)
10352 .alpha(alpha)
10353 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x12), VUnOpMicrokernelTester::OpType::ELU);
10354 }
10355 }
10356 }
10357
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X12,beta)10358 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X12, beta) {
10359 for (float beta : std::vector<float>({0.3f, 3.0f})) {
10360 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
10361 VUnOpMicrokernelTester()
10362 .batch_size(batch_size)
10363 .beta(beta)
10364 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x12), VUnOpMicrokernelTester::OpType::ELU);
10365 }
10366 }
10367 }
10368 #endif // XNN_ARCH_WASMSIMD
10369
10370
10371 #if XNN_ARCH_WASMSIMD
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X16,batch_eq_16)10372 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X16, batch_eq_16) {
10373 VUnOpMicrokernelTester()
10374 .batch_size(16)
10375 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x16), VUnOpMicrokernelTester::OpType::ELU);
10376 }
10377
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X16,batch_div_16)10378 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X16, batch_div_16) {
10379 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
10380 VUnOpMicrokernelTester()
10381 .batch_size(batch_size)
10382 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x16), VUnOpMicrokernelTester::OpType::ELU);
10383 }
10384 }
10385
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X16,batch_lt_16)10386 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X16, batch_lt_16) {
10387 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
10388 VUnOpMicrokernelTester()
10389 .batch_size(batch_size)
10390 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x16), VUnOpMicrokernelTester::OpType::ELU);
10391 }
10392 }
10393
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X16,batch_gt_16)10394 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X16, batch_gt_16) {
10395 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
10396 VUnOpMicrokernelTester()
10397 .batch_size(batch_size)
10398 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x16), VUnOpMicrokernelTester::OpType::ELU);
10399 }
10400 }
10401
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X16,inplace)10402 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X16, inplace) {
10403 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
10404 VUnOpMicrokernelTester()
10405 .batch_size(batch_size)
10406 .inplace(true)
10407 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x16), VUnOpMicrokernelTester::OpType::ELU);
10408 }
10409 }
10410
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X16,prescale)10411 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X16, prescale) {
10412 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
10413 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
10414 VUnOpMicrokernelTester()
10415 .batch_size(batch_size)
10416 .prescale(prescale)
10417 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x16), VUnOpMicrokernelTester::OpType::ELU);
10418 }
10419 }
10420 }
10421
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X16,alpha)10422 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X16, alpha) {
10423 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
10424 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
10425 VUnOpMicrokernelTester()
10426 .batch_size(batch_size)
10427 .alpha(alpha)
10428 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x16), VUnOpMicrokernelTester::OpType::ELU);
10429 }
10430 }
10431 }
10432
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X16,beta)10433 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X16, beta) {
10434 for (float beta : std::vector<float>({0.3f, 3.0f})) {
10435 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
10436 VUnOpMicrokernelTester()
10437 .batch_size(batch_size)
10438 .beta(beta)
10439 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x16), VUnOpMicrokernelTester::OpType::ELU);
10440 }
10441 }
10442 }
10443 #endif // XNN_ARCH_WASMSIMD
10444
10445
10446 #if XNN_ARCH_WASMSIMD
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X20,batch_eq_20)10447 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X20, batch_eq_20) {
10448 VUnOpMicrokernelTester()
10449 .batch_size(20)
10450 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x20), VUnOpMicrokernelTester::OpType::ELU);
10451 }
10452
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X20,batch_div_20)10453 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X20, batch_div_20) {
10454 for (size_t batch_size = 40; batch_size < 200; batch_size += 20) {
10455 VUnOpMicrokernelTester()
10456 .batch_size(batch_size)
10457 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x20), VUnOpMicrokernelTester::OpType::ELU);
10458 }
10459 }
10460
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X20,batch_lt_20)10461 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X20, batch_lt_20) {
10462 for (size_t batch_size = 1; batch_size < 20; batch_size++) {
10463 VUnOpMicrokernelTester()
10464 .batch_size(batch_size)
10465 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x20), VUnOpMicrokernelTester::OpType::ELU);
10466 }
10467 }
10468
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X20,batch_gt_20)10469 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X20, batch_gt_20) {
10470 for (size_t batch_size = 21; batch_size < 40; batch_size++) {
10471 VUnOpMicrokernelTester()
10472 .batch_size(batch_size)
10473 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x20), VUnOpMicrokernelTester::OpType::ELU);
10474 }
10475 }
10476
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X20,inplace)10477 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X20, inplace) {
10478 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
10479 VUnOpMicrokernelTester()
10480 .batch_size(batch_size)
10481 .inplace(true)
10482 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x20), VUnOpMicrokernelTester::OpType::ELU);
10483 }
10484 }
10485
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X20,prescale)10486 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X20, prescale) {
10487 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
10488 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
10489 VUnOpMicrokernelTester()
10490 .batch_size(batch_size)
10491 .prescale(prescale)
10492 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x20), VUnOpMicrokernelTester::OpType::ELU);
10493 }
10494 }
10495 }
10496
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X20,alpha)10497 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X20, alpha) {
10498 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
10499 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
10500 VUnOpMicrokernelTester()
10501 .batch_size(batch_size)
10502 .alpha(alpha)
10503 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x20), VUnOpMicrokernelTester::OpType::ELU);
10504 }
10505 }
10506 }
10507
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X20,beta)10508 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X20, beta) {
10509 for (float beta : std::vector<float>({0.3f, 3.0f})) {
10510 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
10511 VUnOpMicrokernelTester()
10512 .batch_size(batch_size)
10513 .beta(beta)
10514 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x20), VUnOpMicrokernelTester::OpType::ELU);
10515 }
10516 }
10517 }
10518 #endif // XNN_ARCH_WASMSIMD
10519
10520
10521 #if XNN_ARCH_WASMSIMD
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X24,batch_eq_24)10522 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X24, batch_eq_24) {
10523 VUnOpMicrokernelTester()
10524 .batch_size(24)
10525 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x24), VUnOpMicrokernelTester::OpType::ELU);
10526 }
10527
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X24,batch_div_24)10528 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X24, batch_div_24) {
10529 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
10530 VUnOpMicrokernelTester()
10531 .batch_size(batch_size)
10532 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x24), VUnOpMicrokernelTester::OpType::ELU);
10533 }
10534 }
10535
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X24,batch_lt_24)10536 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X24, batch_lt_24) {
10537 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
10538 VUnOpMicrokernelTester()
10539 .batch_size(batch_size)
10540 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x24), VUnOpMicrokernelTester::OpType::ELU);
10541 }
10542 }
10543
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X24,batch_gt_24)10544 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X24, batch_gt_24) {
10545 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
10546 VUnOpMicrokernelTester()
10547 .batch_size(batch_size)
10548 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x24), VUnOpMicrokernelTester::OpType::ELU);
10549 }
10550 }
10551
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X24,inplace)10552 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X24, inplace) {
10553 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
10554 VUnOpMicrokernelTester()
10555 .batch_size(batch_size)
10556 .inplace(true)
10557 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x24), VUnOpMicrokernelTester::OpType::ELU);
10558 }
10559 }
10560
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X24,prescale)10561 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X24, prescale) {
10562 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
10563 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
10564 VUnOpMicrokernelTester()
10565 .batch_size(batch_size)
10566 .prescale(prescale)
10567 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x24), VUnOpMicrokernelTester::OpType::ELU);
10568 }
10569 }
10570 }
10571
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X24,alpha)10572 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X24, alpha) {
10573 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
10574 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
10575 VUnOpMicrokernelTester()
10576 .batch_size(batch_size)
10577 .alpha(alpha)
10578 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x24), VUnOpMicrokernelTester::OpType::ELU);
10579 }
10580 }
10581 }
10582
TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X24,beta)10583 TEST(F32_VELU__WASMSIMD_ARM_RR2_LUT16_P3_X24, beta) {
10584 for (float beta : std::vector<float>({0.3f, 3.0f})) {
10585 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
10586 VUnOpMicrokernelTester()
10587 .batch_size(batch_size)
10588 .beta(beta)
10589 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x24), VUnOpMicrokernelTester::OpType::ELU);
10590 }
10591 }
10592 }
10593 #endif // XNN_ARCH_WASMSIMD
10594
10595
10596 #if XNN_ARCH_WASMSIMD
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X4,batch_eq_4)10597 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X4, batch_eq_4) {
10598 VUnOpMicrokernelTester()
10599 .batch_size(4)
10600 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x4), VUnOpMicrokernelTester::OpType::ELU);
10601 }
10602
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X4,batch_div_4)10603 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X4, batch_div_4) {
10604 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
10605 VUnOpMicrokernelTester()
10606 .batch_size(batch_size)
10607 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x4), VUnOpMicrokernelTester::OpType::ELU);
10608 }
10609 }
10610
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X4,batch_lt_4)10611 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X4, batch_lt_4) {
10612 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
10613 VUnOpMicrokernelTester()
10614 .batch_size(batch_size)
10615 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x4), VUnOpMicrokernelTester::OpType::ELU);
10616 }
10617 }
10618
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X4,batch_gt_4)10619 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X4, batch_gt_4) {
10620 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
10621 VUnOpMicrokernelTester()
10622 .batch_size(batch_size)
10623 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x4), VUnOpMicrokernelTester::OpType::ELU);
10624 }
10625 }
10626
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X4,inplace)10627 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X4, inplace) {
10628 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
10629 VUnOpMicrokernelTester()
10630 .batch_size(batch_size)
10631 .inplace(true)
10632 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x4), VUnOpMicrokernelTester::OpType::ELU);
10633 }
10634 }
10635
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X4,prescale)10636 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X4, prescale) {
10637 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
10638 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
10639 VUnOpMicrokernelTester()
10640 .batch_size(batch_size)
10641 .prescale(prescale)
10642 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x4), VUnOpMicrokernelTester::OpType::ELU);
10643 }
10644 }
10645 }
10646
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X4,alpha)10647 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X4, alpha) {
10648 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
10649 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
10650 VUnOpMicrokernelTester()
10651 .batch_size(batch_size)
10652 .alpha(alpha)
10653 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x4), VUnOpMicrokernelTester::OpType::ELU);
10654 }
10655 }
10656 }
10657
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X4,beta)10658 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X4, beta) {
10659 for (float beta : std::vector<float>({0.3f, 3.0f})) {
10660 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
10661 VUnOpMicrokernelTester()
10662 .batch_size(batch_size)
10663 .beta(beta)
10664 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x4), VUnOpMicrokernelTester::OpType::ELU);
10665 }
10666 }
10667 }
10668 #endif // XNN_ARCH_WASMSIMD
10669
10670
10671 #if XNN_ARCH_WASMSIMD
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X8,batch_eq_8)10672 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X8, batch_eq_8) {
10673 VUnOpMicrokernelTester()
10674 .batch_size(8)
10675 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x8), VUnOpMicrokernelTester::OpType::ELU);
10676 }
10677
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X8,batch_div_8)10678 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X8, batch_div_8) {
10679 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
10680 VUnOpMicrokernelTester()
10681 .batch_size(batch_size)
10682 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x8), VUnOpMicrokernelTester::OpType::ELU);
10683 }
10684 }
10685
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X8,batch_lt_8)10686 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X8, batch_lt_8) {
10687 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
10688 VUnOpMicrokernelTester()
10689 .batch_size(batch_size)
10690 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x8), VUnOpMicrokernelTester::OpType::ELU);
10691 }
10692 }
10693
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X8,batch_gt_8)10694 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X8, batch_gt_8) {
10695 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
10696 VUnOpMicrokernelTester()
10697 .batch_size(batch_size)
10698 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x8), VUnOpMicrokernelTester::OpType::ELU);
10699 }
10700 }
10701
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X8,inplace)10702 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X8, inplace) {
10703 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
10704 VUnOpMicrokernelTester()
10705 .batch_size(batch_size)
10706 .inplace(true)
10707 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x8), VUnOpMicrokernelTester::OpType::ELU);
10708 }
10709 }
10710
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X8,prescale)10711 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X8, prescale) {
10712 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
10713 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
10714 VUnOpMicrokernelTester()
10715 .batch_size(batch_size)
10716 .prescale(prescale)
10717 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x8), VUnOpMicrokernelTester::OpType::ELU);
10718 }
10719 }
10720 }
10721
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X8,alpha)10722 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X8, alpha) {
10723 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
10724 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
10725 VUnOpMicrokernelTester()
10726 .batch_size(batch_size)
10727 .alpha(alpha)
10728 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x8), VUnOpMicrokernelTester::OpType::ELU);
10729 }
10730 }
10731 }
10732
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X8,beta)10733 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X8, beta) {
10734 for (float beta : std::vector<float>({0.3f, 3.0f})) {
10735 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
10736 VUnOpMicrokernelTester()
10737 .batch_size(batch_size)
10738 .beta(beta)
10739 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x8), VUnOpMicrokernelTester::OpType::ELU);
10740 }
10741 }
10742 }
10743 #endif // XNN_ARCH_WASMSIMD
10744
10745
10746 #if XNN_ARCH_WASMSIMD
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X12,batch_eq_12)10747 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X12, batch_eq_12) {
10748 VUnOpMicrokernelTester()
10749 .batch_size(12)
10750 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x12), VUnOpMicrokernelTester::OpType::ELU);
10751 }
10752
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X12,batch_div_12)10753 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X12, batch_div_12) {
10754 for (size_t batch_size = 24; batch_size < 120; batch_size += 12) {
10755 VUnOpMicrokernelTester()
10756 .batch_size(batch_size)
10757 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x12), VUnOpMicrokernelTester::OpType::ELU);
10758 }
10759 }
10760
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X12,batch_lt_12)10761 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X12, batch_lt_12) {
10762 for (size_t batch_size = 1; batch_size < 12; batch_size++) {
10763 VUnOpMicrokernelTester()
10764 .batch_size(batch_size)
10765 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x12), VUnOpMicrokernelTester::OpType::ELU);
10766 }
10767 }
10768
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X12,batch_gt_12)10769 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X12, batch_gt_12) {
10770 for (size_t batch_size = 13; batch_size < 24; batch_size++) {
10771 VUnOpMicrokernelTester()
10772 .batch_size(batch_size)
10773 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x12), VUnOpMicrokernelTester::OpType::ELU);
10774 }
10775 }
10776
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X12,inplace)10777 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X12, inplace) {
10778 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
10779 VUnOpMicrokernelTester()
10780 .batch_size(batch_size)
10781 .inplace(true)
10782 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x12), VUnOpMicrokernelTester::OpType::ELU);
10783 }
10784 }
10785
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X12,prescale)10786 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X12, prescale) {
10787 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
10788 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
10789 VUnOpMicrokernelTester()
10790 .batch_size(batch_size)
10791 .prescale(prescale)
10792 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x12), VUnOpMicrokernelTester::OpType::ELU);
10793 }
10794 }
10795 }
10796
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X12,alpha)10797 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X12, alpha) {
10798 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
10799 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
10800 VUnOpMicrokernelTester()
10801 .batch_size(batch_size)
10802 .alpha(alpha)
10803 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x12), VUnOpMicrokernelTester::OpType::ELU);
10804 }
10805 }
10806 }
10807
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X12,beta)10808 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X12, beta) {
10809 for (float beta : std::vector<float>({0.3f, 3.0f})) {
10810 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
10811 VUnOpMicrokernelTester()
10812 .batch_size(batch_size)
10813 .beta(beta)
10814 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x12), VUnOpMicrokernelTester::OpType::ELU);
10815 }
10816 }
10817 }
10818 #endif // XNN_ARCH_WASMSIMD
10819
10820
10821 #if XNN_ARCH_WASMSIMD
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X16,batch_eq_16)10822 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X16, batch_eq_16) {
10823 VUnOpMicrokernelTester()
10824 .batch_size(16)
10825 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x16), VUnOpMicrokernelTester::OpType::ELU);
10826 }
10827
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X16,batch_div_16)10828 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X16, batch_div_16) {
10829 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
10830 VUnOpMicrokernelTester()
10831 .batch_size(batch_size)
10832 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x16), VUnOpMicrokernelTester::OpType::ELU);
10833 }
10834 }
10835
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X16,batch_lt_16)10836 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X16, batch_lt_16) {
10837 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
10838 VUnOpMicrokernelTester()
10839 .batch_size(batch_size)
10840 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x16), VUnOpMicrokernelTester::OpType::ELU);
10841 }
10842 }
10843
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X16,batch_gt_16)10844 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X16, batch_gt_16) {
10845 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
10846 VUnOpMicrokernelTester()
10847 .batch_size(batch_size)
10848 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x16), VUnOpMicrokernelTester::OpType::ELU);
10849 }
10850 }
10851
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X16,inplace)10852 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X16, inplace) {
10853 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
10854 VUnOpMicrokernelTester()
10855 .batch_size(batch_size)
10856 .inplace(true)
10857 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x16), VUnOpMicrokernelTester::OpType::ELU);
10858 }
10859 }
10860
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X16,prescale)10861 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X16, prescale) {
10862 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
10863 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
10864 VUnOpMicrokernelTester()
10865 .batch_size(batch_size)
10866 .prescale(prescale)
10867 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x16), VUnOpMicrokernelTester::OpType::ELU);
10868 }
10869 }
10870 }
10871
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X16,alpha)10872 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X16, alpha) {
10873 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
10874 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
10875 VUnOpMicrokernelTester()
10876 .batch_size(batch_size)
10877 .alpha(alpha)
10878 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x16), VUnOpMicrokernelTester::OpType::ELU);
10879 }
10880 }
10881 }
10882
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X16,beta)10883 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X16, beta) {
10884 for (float beta : std::vector<float>({0.3f, 3.0f})) {
10885 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
10886 VUnOpMicrokernelTester()
10887 .batch_size(batch_size)
10888 .beta(beta)
10889 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x16), VUnOpMicrokernelTester::OpType::ELU);
10890 }
10891 }
10892 }
10893 #endif // XNN_ARCH_WASMSIMD
10894
10895
10896 #if XNN_ARCH_WASMSIMD
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X20,batch_eq_20)10897 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X20, batch_eq_20) {
10898 VUnOpMicrokernelTester()
10899 .batch_size(20)
10900 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x20), VUnOpMicrokernelTester::OpType::ELU);
10901 }
10902
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X20,batch_div_20)10903 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X20, batch_div_20) {
10904 for (size_t batch_size = 40; batch_size < 200; batch_size += 20) {
10905 VUnOpMicrokernelTester()
10906 .batch_size(batch_size)
10907 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x20), VUnOpMicrokernelTester::OpType::ELU);
10908 }
10909 }
10910
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X20,batch_lt_20)10911 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X20, batch_lt_20) {
10912 for (size_t batch_size = 1; batch_size < 20; batch_size++) {
10913 VUnOpMicrokernelTester()
10914 .batch_size(batch_size)
10915 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x20), VUnOpMicrokernelTester::OpType::ELU);
10916 }
10917 }
10918
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X20,batch_gt_20)10919 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X20, batch_gt_20) {
10920 for (size_t batch_size = 21; batch_size < 40; batch_size++) {
10921 VUnOpMicrokernelTester()
10922 .batch_size(batch_size)
10923 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x20), VUnOpMicrokernelTester::OpType::ELU);
10924 }
10925 }
10926
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X20,inplace)10927 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X20, inplace) {
10928 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
10929 VUnOpMicrokernelTester()
10930 .batch_size(batch_size)
10931 .inplace(true)
10932 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x20), VUnOpMicrokernelTester::OpType::ELU);
10933 }
10934 }
10935
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X20,prescale)10936 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X20, prescale) {
10937 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
10938 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
10939 VUnOpMicrokernelTester()
10940 .batch_size(batch_size)
10941 .prescale(prescale)
10942 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x20), VUnOpMicrokernelTester::OpType::ELU);
10943 }
10944 }
10945 }
10946
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X20,alpha)10947 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X20, alpha) {
10948 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
10949 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
10950 VUnOpMicrokernelTester()
10951 .batch_size(batch_size)
10952 .alpha(alpha)
10953 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x20), VUnOpMicrokernelTester::OpType::ELU);
10954 }
10955 }
10956 }
10957
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X20,beta)10958 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X20, beta) {
10959 for (float beta : std::vector<float>({0.3f, 3.0f})) {
10960 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
10961 VUnOpMicrokernelTester()
10962 .batch_size(batch_size)
10963 .beta(beta)
10964 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x20), VUnOpMicrokernelTester::OpType::ELU);
10965 }
10966 }
10967 }
10968 #endif // XNN_ARCH_WASMSIMD
10969
10970
10971 #if XNN_ARCH_WASMSIMD
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X24,batch_eq_24)10972 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X24, batch_eq_24) {
10973 VUnOpMicrokernelTester()
10974 .batch_size(24)
10975 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24), VUnOpMicrokernelTester::OpType::ELU);
10976 }
10977
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X24,batch_div_24)10978 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X24, batch_div_24) {
10979 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
10980 VUnOpMicrokernelTester()
10981 .batch_size(batch_size)
10982 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24), VUnOpMicrokernelTester::OpType::ELU);
10983 }
10984 }
10985
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X24,batch_lt_24)10986 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X24, batch_lt_24) {
10987 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
10988 VUnOpMicrokernelTester()
10989 .batch_size(batch_size)
10990 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24), VUnOpMicrokernelTester::OpType::ELU);
10991 }
10992 }
10993
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X24,batch_gt_24)10994 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X24, batch_gt_24) {
10995 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
10996 VUnOpMicrokernelTester()
10997 .batch_size(batch_size)
10998 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24), VUnOpMicrokernelTester::OpType::ELU);
10999 }
11000 }
11001
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X24,inplace)11002 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X24, inplace) {
11003 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
11004 VUnOpMicrokernelTester()
11005 .batch_size(batch_size)
11006 .inplace(true)
11007 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24), VUnOpMicrokernelTester::OpType::ELU);
11008 }
11009 }
11010
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X24,prescale)11011 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X24, prescale) {
11012 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
11013 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
11014 VUnOpMicrokernelTester()
11015 .batch_size(batch_size)
11016 .prescale(prescale)
11017 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24), VUnOpMicrokernelTester::OpType::ELU);
11018 }
11019 }
11020 }
11021
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X24,alpha)11022 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X24, alpha) {
11023 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
11024 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
11025 VUnOpMicrokernelTester()
11026 .batch_size(batch_size)
11027 .alpha(alpha)
11028 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24), VUnOpMicrokernelTester::OpType::ELU);
11029 }
11030 }
11031 }
11032
TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X24,beta)11033 TEST(F32_VELU__WASMSIMD_X86_RR2_LUT16_P3_X24, beta) {
11034 for (float beta : std::vector<float>({0.3f, 3.0f})) {
11035 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
11036 VUnOpMicrokernelTester()
11037 .batch_size(batch_size)
11038 .beta(beta)
11039 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x24), VUnOpMicrokernelTester::OpType::ELU);
11040 }
11041 }
11042 }
11043 #endif // XNN_ARCH_WASMSIMD
11044
11045
11046 #if XNN_ARCH_WASMSIMD
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X4,batch_eq_4)11047 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X4, batch_eq_4) {
11048 VUnOpMicrokernelTester()
11049 .batch_size(4)
11050 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x4), VUnOpMicrokernelTester::OpType::ELU);
11051 }
11052
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X4,batch_div_4)11053 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X4, batch_div_4) {
11054 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
11055 VUnOpMicrokernelTester()
11056 .batch_size(batch_size)
11057 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x4), VUnOpMicrokernelTester::OpType::ELU);
11058 }
11059 }
11060
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X4,batch_lt_4)11061 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X4, batch_lt_4) {
11062 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
11063 VUnOpMicrokernelTester()
11064 .batch_size(batch_size)
11065 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x4), VUnOpMicrokernelTester::OpType::ELU);
11066 }
11067 }
11068
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X4,batch_gt_4)11069 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X4, batch_gt_4) {
11070 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
11071 VUnOpMicrokernelTester()
11072 .batch_size(batch_size)
11073 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x4), VUnOpMicrokernelTester::OpType::ELU);
11074 }
11075 }
11076
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X4,inplace)11077 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X4, inplace) {
11078 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
11079 VUnOpMicrokernelTester()
11080 .batch_size(batch_size)
11081 .inplace(true)
11082 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x4), VUnOpMicrokernelTester::OpType::ELU);
11083 }
11084 }
11085
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X4,prescale)11086 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X4, prescale) {
11087 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
11088 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
11089 VUnOpMicrokernelTester()
11090 .batch_size(batch_size)
11091 .prescale(prescale)
11092 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x4), VUnOpMicrokernelTester::OpType::ELU);
11093 }
11094 }
11095 }
11096
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X4,alpha)11097 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X4, alpha) {
11098 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
11099 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
11100 VUnOpMicrokernelTester()
11101 .batch_size(batch_size)
11102 .alpha(alpha)
11103 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x4), VUnOpMicrokernelTester::OpType::ELU);
11104 }
11105 }
11106 }
11107
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X4,beta)11108 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X4, beta) {
11109 for (float beta : std::vector<float>({0.3f, 3.0f})) {
11110 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
11111 VUnOpMicrokernelTester()
11112 .batch_size(batch_size)
11113 .beta(beta)
11114 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x4), VUnOpMicrokernelTester::OpType::ELU);
11115 }
11116 }
11117 }
11118 #endif // XNN_ARCH_WASMSIMD
11119
11120
11121 #if XNN_ARCH_WASMSIMD
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X8,batch_eq_8)11122 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X8, batch_eq_8) {
11123 VUnOpMicrokernelTester()
11124 .batch_size(8)
11125 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x8), VUnOpMicrokernelTester::OpType::ELU);
11126 }
11127
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X8,batch_div_8)11128 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X8, batch_div_8) {
11129 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
11130 VUnOpMicrokernelTester()
11131 .batch_size(batch_size)
11132 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x8), VUnOpMicrokernelTester::OpType::ELU);
11133 }
11134 }
11135
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X8,batch_lt_8)11136 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X8, batch_lt_8) {
11137 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
11138 VUnOpMicrokernelTester()
11139 .batch_size(batch_size)
11140 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x8), VUnOpMicrokernelTester::OpType::ELU);
11141 }
11142 }
11143
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X8,batch_gt_8)11144 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X8, batch_gt_8) {
11145 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
11146 VUnOpMicrokernelTester()
11147 .batch_size(batch_size)
11148 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x8), VUnOpMicrokernelTester::OpType::ELU);
11149 }
11150 }
11151
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X8,inplace)11152 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X8, inplace) {
11153 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
11154 VUnOpMicrokernelTester()
11155 .batch_size(batch_size)
11156 .inplace(true)
11157 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x8), VUnOpMicrokernelTester::OpType::ELU);
11158 }
11159 }
11160
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X8,prescale)11161 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X8, prescale) {
11162 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
11163 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
11164 VUnOpMicrokernelTester()
11165 .batch_size(batch_size)
11166 .prescale(prescale)
11167 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x8), VUnOpMicrokernelTester::OpType::ELU);
11168 }
11169 }
11170 }
11171
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X8,alpha)11172 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X8, alpha) {
11173 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
11174 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
11175 VUnOpMicrokernelTester()
11176 .batch_size(batch_size)
11177 .alpha(alpha)
11178 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x8), VUnOpMicrokernelTester::OpType::ELU);
11179 }
11180 }
11181 }
11182
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X8,beta)11183 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X8, beta) {
11184 for (float beta : std::vector<float>({0.3f, 3.0f})) {
11185 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
11186 VUnOpMicrokernelTester()
11187 .batch_size(batch_size)
11188 .beta(beta)
11189 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x8), VUnOpMicrokernelTester::OpType::ELU);
11190 }
11191 }
11192 }
11193 #endif // XNN_ARCH_WASMSIMD
11194
11195
11196 #if XNN_ARCH_WASMSIMD
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X12,batch_eq_12)11197 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X12, batch_eq_12) {
11198 VUnOpMicrokernelTester()
11199 .batch_size(12)
11200 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x12), VUnOpMicrokernelTester::OpType::ELU);
11201 }
11202
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X12,batch_div_12)11203 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X12, batch_div_12) {
11204 for (size_t batch_size = 24; batch_size < 120; batch_size += 12) {
11205 VUnOpMicrokernelTester()
11206 .batch_size(batch_size)
11207 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x12), VUnOpMicrokernelTester::OpType::ELU);
11208 }
11209 }
11210
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X12,batch_lt_12)11211 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X12, batch_lt_12) {
11212 for (size_t batch_size = 1; batch_size < 12; batch_size++) {
11213 VUnOpMicrokernelTester()
11214 .batch_size(batch_size)
11215 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x12), VUnOpMicrokernelTester::OpType::ELU);
11216 }
11217 }
11218
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X12,batch_gt_12)11219 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X12, batch_gt_12) {
11220 for (size_t batch_size = 13; batch_size < 24; batch_size++) {
11221 VUnOpMicrokernelTester()
11222 .batch_size(batch_size)
11223 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x12), VUnOpMicrokernelTester::OpType::ELU);
11224 }
11225 }
11226
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X12,inplace)11227 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X12, inplace) {
11228 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
11229 VUnOpMicrokernelTester()
11230 .batch_size(batch_size)
11231 .inplace(true)
11232 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x12), VUnOpMicrokernelTester::OpType::ELU);
11233 }
11234 }
11235
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X12,prescale)11236 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X12, prescale) {
11237 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
11238 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
11239 VUnOpMicrokernelTester()
11240 .batch_size(batch_size)
11241 .prescale(prescale)
11242 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x12), VUnOpMicrokernelTester::OpType::ELU);
11243 }
11244 }
11245 }
11246
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X12,alpha)11247 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X12, alpha) {
11248 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
11249 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
11250 VUnOpMicrokernelTester()
11251 .batch_size(batch_size)
11252 .alpha(alpha)
11253 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x12), VUnOpMicrokernelTester::OpType::ELU);
11254 }
11255 }
11256 }
11257
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X12,beta)11258 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X12, beta) {
11259 for (float beta : std::vector<float>({0.3f, 3.0f})) {
11260 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
11261 VUnOpMicrokernelTester()
11262 .batch_size(batch_size)
11263 .beta(beta)
11264 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x12), VUnOpMicrokernelTester::OpType::ELU);
11265 }
11266 }
11267 }
11268 #endif // XNN_ARCH_WASMSIMD
11269
11270
11271 #if XNN_ARCH_WASMSIMD
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X16,batch_eq_16)11272 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X16, batch_eq_16) {
11273 VUnOpMicrokernelTester()
11274 .batch_size(16)
11275 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
11276 }
11277
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X16,batch_div_16)11278 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X16, batch_div_16) {
11279 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
11280 VUnOpMicrokernelTester()
11281 .batch_size(batch_size)
11282 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
11283 }
11284 }
11285
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X16,batch_lt_16)11286 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X16, batch_lt_16) {
11287 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
11288 VUnOpMicrokernelTester()
11289 .batch_size(batch_size)
11290 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
11291 }
11292 }
11293
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X16,batch_gt_16)11294 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X16, batch_gt_16) {
11295 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
11296 VUnOpMicrokernelTester()
11297 .batch_size(batch_size)
11298 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
11299 }
11300 }
11301
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X16,inplace)11302 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X16, inplace) {
11303 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
11304 VUnOpMicrokernelTester()
11305 .batch_size(batch_size)
11306 .inplace(true)
11307 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
11308 }
11309 }
11310
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X16,prescale)11311 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X16, prescale) {
11312 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
11313 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
11314 VUnOpMicrokernelTester()
11315 .batch_size(batch_size)
11316 .prescale(prescale)
11317 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
11318 }
11319 }
11320 }
11321
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X16,alpha)11322 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X16, alpha) {
11323 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
11324 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
11325 VUnOpMicrokernelTester()
11326 .batch_size(batch_size)
11327 .alpha(alpha)
11328 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
11329 }
11330 }
11331 }
11332
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X16,beta)11333 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X16, beta) {
11334 for (float beta : std::vector<float>({0.3f, 3.0f})) {
11335 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
11336 VUnOpMicrokernelTester()
11337 .batch_size(batch_size)
11338 .beta(beta)
11339 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
11340 }
11341 }
11342 }
11343 #endif // XNN_ARCH_WASMSIMD
11344
11345
11346 #if XNN_ARCH_WASMSIMD
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X20,batch_eq_20)11347 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X20, batch_eq_20) {
11348 VUnOpMicrokernelTester()
11349 .batch_size(20)
11350 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20), VUnOpMicrokernelTester::OpType::ELU);
11351 }
11352
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X20,batch_div_20)11353 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X20, batch_div_20) {
11354 for (size_t batch_size = 40; batch_size < 200; batch_size += 20) {
11355 VUnOpMicrokernelTester()
11356 .batch_size(batch_size)
11357 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20), VUnOpMicrokernelTester::OpType::ELU);
11358 }
11359 }
11360
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X20,batch_lt_20)11361 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X20, batch_lt_20) {
11362 for (size_t batch_size = 1; batch_size < 20; batch_size++) {
11363 VUnOpMicrokernelTester()
11364 .batch_size(batch_size)
11365 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20), VUnOpMicrokernelTester::OpType::ELU);
11366 }
11367 }
11368
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X20,batch_gt_20)11369 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X20, batch_gt_20) {
11370 for (size_t batch_size = 21; batch_size < 40; batch_size++) {
11371 VUnOpMicrokernelTester()
11372 .batch_size(batch_size)
11373 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20), VUnOpMicrokernelTester::OpType::ELU);
11374 }
11375 }
11376
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X20,inplace)11377 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X20, inplace) {
11378 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
11379 VUnOpMicrokernelTester()
11380 .batch_size(batch_size)
11381 .inplace(true)
11382 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20), VUnOpMicrokernelTester::OpType::ELU);
11383 }
11384 }
11385
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X20,prescale)11386 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X20, prescale) {
11387 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
11388 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
11389 VUnOpMicrokernelTester()
11390 .batch_size(batch_size)
11391 .prescale(prescale)
11392 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20), VUnOpMicrokernelTester::OpType::ELU);
11393 }
11394 }
11395 }
11396
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X20,alpha)11397 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X20, alpha) {
11398 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
11399 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
11400 VUnOpMicrokernelTester()
11401 .batch_size(batch_size)
11402 .alpha(alpha)
11403 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20), VUnOpMicrokernelTester::OpType::ELU);
11404 }
11405 }
11406 }
11407
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X20,beta)11408 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X20, beta) {
11409 for (float beta : std::vector<float>({0.3f, 3.0f})) {
11410 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
11411 VUnOpMicrokernelTester()
11412 .batch_size(batch_size)
11413 .beta(beta)
11414 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x20), VUnOpMicrokernelTester::OpType::ELU);
11415 }
11416 }
11417 }
11418 #endif // XNN_ARCH_WASMSIMD
11419
11420
11421 #if XNN_ARCH_WASMSIMD
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X24,batch_eq_24)11422 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X24, batch_eq_24) {
11423 VUnOpMicrokernelTester()
11424 .batch_size(24)
11425 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24), VUnOpMicrokernelTester::OpType::ELU);
11426 }
11427
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X24,batch_div_24)11428 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X24, batch_div_24) {
11429 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
11430 VUnOpMicrokernelTester()
11431 .batch_size(batch_size)
11432 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24), VUnOpMicrokernelTester::OpType::ELU);
11433 }
11434 }
11435
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X24,batch_lt_24)11436 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X24, batch_lt_24) {
11437 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
11438 VUnOpMicrokernelTester()
11439 .batch_size(batch_size)
11440 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24), VUnOpMicrokernelTester::OpType::ELU);
11441 }
11442 }
11443
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X24,batch_gt_24)11444 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X24, batch_gt_24) {
11445 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
11446 VUnOpMicrokernelTester()
11447 .batch_size(batch_size)
11448 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24), VUnOpMicrokernelTester::OpType::ELU);
11449 }
11450 }
11451
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X24,inplace)11452 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X24, inplace) {
11453 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
11454 VUnOpMicrokernelTester()
11455 .batch_size(batch_size)
11456 .inplace(true)
11457 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24), VUnOpMicrokernelTester::OpType::ELU);
11458 }
11459 }
11460
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X24,prescale)11461 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X24, prescale) {
11462 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
11463 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
11464 VUnOpMicrokernelTester()
11465 .batch_size(batch_size)
11466 .prescale(prescale)
11467 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24), VUnOpMicrokernelTester::OpType::ELU);
11468 }
11469 }
11470 }
11471
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X24,alpha)11472 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X24, alpha) {
11473 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
11474 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
11475 VUnOpMicrokernelTester()
11476 .batch_size(batch_size)
11477 .alpha(alpha)
11478 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24), VUnOpMicrokernelTester::OpType::ELU);
11479 }
11480 }
11481 }
11482
TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X24,beta)11483 TEST(F32_VELU__WASMSIMD_ARM_RR2_P6_X24, beta) {
11484 for (float beta : std::vector<float>({0.3f, 3.0f})) {
11485 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
11486 VUnOpMicrokernelTester()
11487 .batch_size(batch_size)
11488 .beta(beta)
11489 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_arm_rr2_p6_x24), VUnOpMicrokernelTester::OpType::ELU);
11490 }
11491 }
11492 }
11493 #endif // XNN_ARCH_WASMSIMD
11494
11495
11496 #if XNN_ARCH_WASMSIMD
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X4,batch_eq_4)11497 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X4, batch_eq_4) {
11498 VUnOpMicrokernelTester()
11499 .batch_size(4)
11500 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x4), VUnOpMicrokernelTester::OpType::ELU);
11501 }
11502
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X4,batch_div_4)11503 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X4, batch_div_4) {
11504 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
11505 VUnOpMicrokernelTester()
11506 .batch_size(batch_size)
11507 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x4), VUnOpMicrokernelTester::OpType::ELU);
11508 }
11509 }
11510
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X4,batch_lt_4)11511 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X4, batch_lt_4) {
11512 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
11513 VUnOpMicrokernelTester()
11514 .batch_size(batch_size)
11515 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x4), VUnOpMicrokernelTester::OpType::ELU);
11516 }
11517 }
11518
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X4,batch_gt_4)11519 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X4, batch_gt_4) {
11520 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
11521 VUnOpMicrokernelTester()
11522 .batch_size(batch_size)
11523 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x4), VUnOpMicrokernelTester::OpType::ELU);
11524 }
11525 }
11526
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X4,inplace)11527 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X4, inplace) {
11528 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
11529 VUnOpMicrokernelTester()
11530 .batch_size(batch_size)
11531 .inplace(true)
11532 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x4), VUnOpMicrokernelTester::OpType::ELU);
11533 }
11534 }
11535
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X4,prescale)11536 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X4, prescale) {
11537 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
11538 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
11539 VUnOpMicrokernelTester()
11540 .batch_size(batch_size)
11541 .prescale(prescale)
11542 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x4), VUnOpMicrokernelTester::OpType::ELU);
11543 }
11544 }
11545 }
11546
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X4,alpha)11547 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X4, alpha) {
11548 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
11549 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
11550 VUnOpMicrokernelTester()
11551 .batch_size(batch_size)
11552 .alpha(alpha)
11553 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x4), VUnOpMicrokernelTester::OpType::ELU);
11554 }
11555 }
11556 }
11557
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X4,beta)11558 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X4, beta) {
11559 for (float beta : std::vector<float>({0.3f, 3.0f})) {
11560 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
11561 VUnOpMicrokernelTester()
11562 .batch_size(batch_size)
11563 .beta(beta)
11564 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x4), VUnOpMicrokernelTester::OpType::ELU);
11565 }
11566 }
11567 }
11568 #endif // XNN_ARCH_WASMSIMD
11569
11570
11571 #if XNN_ARCH_WASMSIMD
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X8,batch_eq_8)11572 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X8, batch_eq_8) {
11573 VUnOpMicrokernelTester()
11574 .batch_size(8)
11575 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8), VUnOpMicrokernelTester::OpType::ELU);
11576 }
11577
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X8,batch_div_8)11578 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X8, batch_div_8) {
11579 for (size_t batch_size = 16; batch_size < 80; batch_size += 8) {
11580 VUnOpMicrokernelTester()
11581 .batch_size(batch_size)
11582 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8), VUnOpMicrokernelTester::OpType::ELU);
11583 }
11584 }
11585
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X8,batch_lt_8)11586 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X8, batch_lt_8) {
11587 for (size_t batch_size = 1; batch_size < 8; batch_size++) {
11588 VUnOpMicrokernelTester()
11589 .batch_size(batch_size)
11590 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8), VUnOpMicrokernelTester::OpType::ELU);
11591 }
11592 }
11593
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X8,batch_gt_8)11594 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X8, batch_gt_8) {
11595 for (size_t batch_size = 9; batch_size < 16; batch_size++) {
11596 VUnOpMicrokernelTester()
11597 .batch_size(batch_size)
11598 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8), VUnOpMicrokernelTester::OpType::ELU);
11599 }
11600 }
11601
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X8,inplace)11602 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X8, inplace) {
11603 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
11604 VUnOpMicrokernelTester()
11605 .batch_size(batch_size)
11606 .inplace(true)
11607 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8), VUnOpMicrokernelTester::OpType::ELU);
11608 }
11609 }
11610
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X8,prescale)11611 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X8, prescale) {
11612 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
11613 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
11614 VUnOpMicrokernelTester()
11615 .batch_size(batch_size)
11616 .prescale(prescale)
11617 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8), VUnOpMicrokernelTester::OpType::ELU);
11618 }
11619 }
11620 }
11621
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X8,alpha)11622 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X8, alpha) {
11623 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
11624 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
11625 VUnOpMicrokernelTester()
11626 .batch_size(batch_size)
11627 .alpha(alpha)
11628 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8), VUnOpMicrokernelTester::OpType::ELU);
11629 }
11630 }
11631 }
11632
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X8,beta)11633 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X8, beta) {
11634 for (float beta : std::vector<float>({0.3f, 3.0f})) {
11635 for (size_t batch_size = 1; batch_size <= 40; batch_size += 7) {
11636 VUnOpMicrokernelTester()
11637 .batch_size(batch_size)
11638 .beta(beta)
11639 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x8), VUnOpMicrokernelTester::OpType::ELU);
11640 }
11641 }
11642 }
11643 #endif // XNN_ARCH_WASMSIMD
11644
11645
11646 #if XNN_ARCH_WASMSIMD
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X12,batch_eq_12)11647 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X12, batch_eq_12) {
11648 VUnOpMicrokernelTester()
11649 .batch_size(12)
11650 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12), VUnOpMicrokernelTester::OpType::ELU);
11651 }
11652
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X12,batch_div_12)11653 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X12, batch_div_12) {
11654 for (size_t batch_size = 24; batch_size < 120; batch_size += 12) {
11655 VUnOpMicrokernelTester()
11656 .batch_size(batch_size)
11657 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12), VUnOpMicrokernelTester::OpType::ELU);
11658 }
11659 }
11660
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X12,batch_lt_12)11661 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X12, batch_lt_12) {
11662 for (size_t batch_size = 1; batch_size < 12; batch_size++) {
11663 VUnOpMicrokernelTester()
11664 .batch_size(batch_size)
11665 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12), VUnOpMicrokernelTester::OpType::ELU);
11666 }
11667 }
11668
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X12,batch_gt_12)11669 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X12, batch_gt_12) {
11670 for (size_t batch_size = 13; batch_size < 24; batch_size++) {
11671 VUnOpMicrokernelTester()
11672 .batch_size(batch_size)
11673 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12), VUnOpMicrokernelTester::OpType::ELU);
11674 }
11675 }
11676
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X12,inplace)11677 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X12, inplace) {
11678 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
11679 VUnOpMicrokernelTester()
11680 .batch_size(batch_size)
11681 .inplace(true)
11682 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12), VUnOpMicrokernelTester::OpType::ELU);
11683 }
11684 }
11685
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X12,prescale)11686 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X12, prescale) {
11687 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
11688 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
11689 VUnOpMicrokernelTester()
11690 .batch_size(batch_size)
11691 .prescale(prescale)
11692 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12), VUnOpMicrokernelTester::OpType::ELU);
11693 }
11694 }
11695 }
11696
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X12,alpha)11697 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X12, alpha) {
11698 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
11699 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
11700 VUnOpMicrokernelTester()
11701 .batch_size(batch_size)
11702 .alpha(alpha)
11703 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12), VUnOpMicrokernelTester::OpType::ELU);
11704 }
11705 }
11706 }
11707
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X12,beta)11708 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X12, beta) {
11709 for (float beta : std::vector<float>({0.3f, 3.0f})) {
11710 for (size_t batch_size = 1; batch_size <= 60; batch_size += 11) {
11711 VUnOpMicrokernelTester()
11712 .batch_size(batch_size)
11713 .beta(beta)
11714 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x12), VUnOpMicrokernelTester::OpType::ELU);
11715 }
11716 }
11717 }
11718 #endif // XNN_ARCH_WASMSIMD
11719
11720
11721 #if XNN_ARCH_WASMSIMD
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X16,batch_eq_16)11722 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X16, batch_eq_16) {
11723 VUnOpMicrokernelTester()
11724 .batch_size(16)
11725 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
11726 }
11727
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X16,batch_div_16)11728 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X16, batch_div_16) {
11729 for (size_t batch_size = 32; batch_size < 160; batch_size += 16) {
11730 VUnOpMicrokernelTester()
11731 .batch_size(batch_size)
11732 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
11733 }
11734 }
11735
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X16,batch_lt_16)11736 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X16, batch_lt_16) {
11737 for (size_t batch_size = 1; batch_size < 16; batch_size++) {
11738 VUnOpMicrokernelTester()
11739 .batch_size(batch_size)
11740 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
11741 }
11742 }
11743
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X16,batch_gt_16)11744 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X16, batch_gt_16) {
11745 for (size_t batch_size = 17; batch_size < 32; batch_size++) {
11746 VUnOpMicrokernelTester()
11747 .batch_size(batch_size)
11748 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
11749 }
11750 }
11751
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X16,inplace)11752 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X16, inplace) {
11753 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
11754 VUnOpMicrokernelTester()
11755 .batch_size(batch_size)
11756 .inplace(true)
11757 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
11758 }
11759 }
11760
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X16,prescale)11761 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X16, prescale) {
11762 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
11763 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
11764 VUnOpMicrokernelTester()
11765 .batch_size(batch_size)
11766 .prescale(prescale)
11767 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
11768 }
11769 }
11770 }
11771
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X16,alpha)11772 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X16, alpha) {
11773 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
11774 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
11775 VUnOpMicrokernelTester()
11776 .batch_size(batch_size)
11777 .alpha(alpha)
11778 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
11779 }
11780 }
11781 }
11782
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X16,beta)11783 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X16, beta) {
11784 for (float beta : std::vector<float>({0.3f, 3.0f})) {
11785 for (size_t batch_size = 1; batch_size <= 80; batch_size += 15) {
11786 VUnOpMicrokernelTester()
11787 .batch_size(batch_size)
11788 .beta(beta)
11789 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x16), VUnOpMicrokernelTester::OpType::ELU);
11790 }
11791 }
11792 }
11793 #endif // XNN_ARCH_WASMSIMD
11794
11795
11796 #if XNN_ARCH_WASMSIMD
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X20,batch_eq_20)11797 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X20, batch_eq_20) {
11798 VUnOpMicrokernelTester()
11799 .batch_size(20)
11800 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20), VUnOpMicrokernelTester::OpType::ELU);
11801 }
11802
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X20,batch_div_20)11803 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X20, batch_div_20) {
11804 for (size_t batch_size = 40; batch_size < 200; batch_size += 20) {
11805 VUnOpMicrokernelTester()
11806 .batch_size(batch_size)
11807 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20), VUnOpMicrokernelTester::OpType::ELU);
11808 }
11809 }
11810
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X20,batch_lt_20)11811 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X20, batch_lt_20) {
11812 for (size_t batch_size = 1; batch_size < 20; batch_size++) {
11813 VUnOpMicrokernelTester()
11814 .batch_size(batch_size)
11815 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20), VUnOpMicrokernelTester::OpType::ELU);
11816 }
11817 }
11818
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X20,batch_gt_20)11819 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X20, batch_gt_20) {
11820 for (size_t batch_size = 21; batch_size < 40; batch_size++) {
11821 VUnOpMicrokernelTester()
11822 .batch_size(batch_size)
11823 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20), VUnOpMicrokernelTester::OpType::ELU);
11824 }
11825 }
11826
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X20,inplace)11827 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X20, inplace) {
11828 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
11829 VUnOpMicrokernelTester()
11830 .batch_size(batch_size)
11831 .inplace(true)
11832 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20), VUnOpMicrokernelTester::OpType::ELU);
11833 }
11834 }
11835
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X20,prescale)11836 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X20, prescale) {
11837 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
11838 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
11839 VUnOpMicrokernelTester()
11840 .batch_size(batch_size)
11841 .prescale(prescale)
11842 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20), VUnOpMicrokernelTester::OpType::ELU);
11843 }
11844 }
11845 }
11846
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X20,alpha)11847 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X20, alpha) {
11848 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
11849 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
11850 VUnOpMicrokernelTester()
11851 .batch_size(batch_size)
11852 .alpha(alpha)
11853 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20), VUnOpMicrokernelTester::OpType::ELU);
11854 }
11855 }
11856 }
11857
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X20,beta)11858 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X20, beta) {
11859 for (float beta : std::vector<float>({0.3f, 3.0f})) {
11860 for (size_t batch_size = 1; batch_size <= 100; batch_size += 19) {
11861 VUnOpMicrokernelTester()
11862 .batch_size(batch_size)
11863 .beta(beta)
11864 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x20), VUnOpMicrokernelTester::OpType::ELU);
11865 }
11866 }
11867 }
11868 #endif // XNN_ARCH_WASMSIMD
11869
11870
11871 #if XNN_ARCH_WASMSIMD
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X24,batch_eq_24)11872 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X24, batch_eq_24) {
11873 VUnOpMicrokernelTester()
11874 .batch_size(24)
11875 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24), VUnOpMicrokernelTester::OpType::ELU);
11876 }
11877
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X24,batch_div_24)11878 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X24, batch_div_24) {
11879 for (size_t batch_size = 48; batch_size < 240; batch_size += 24) {
11880 VUnOpMicrokernelTester()
11881 .batch_size(batch_size)
11882 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24), VUnOpMicrokernelTester::OpType::ELU);
11883 }
11884 }
11885
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X24,batch_lt_24)11886 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X24, batch_lt_24) {
11887 for (size_t batch_size = 1; batch_size < 24; batch_size++) {
11888 VUnOpMicrokernelTester()
11889 .batch_size(batch_size)
11890 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24), VUnOpMicrokernelTester::OpType::ELU);
11891 }
11892 }
11893
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X24,batch_gt_24)11894 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X24, batch_gt_24) {
11895 for (size_t batch_size = 25; batch_size < 48; batch_size++) {
11896 VUnOpMicrokernelTester()
11897 .batch_size(batch_size)
11898 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24), VUnOpMicrokernelTester::OpType::ELU);
11899 }
11900 }
11901
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X24,inplace)11902 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X24, inplace) {
11903 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
11904 VUnOpMicrokernelTester()
11905 .batch_size(batch_size)
11906 .inplace(true)
11907 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24), VUnOpMicrokernelTester::OpType::ELU);
11908 }
11909 }
11910
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X24,prescale)11911 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X24, prescale) {
11912 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
11913 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
11914 VUnOpMicrokernelTester()
11915 .batch_size(batch_size)
11916 .prescale(prescale)
11917 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24), VUnOpMicrokernelTester::OpType::ELU);
11918 }
11919 }
11920 }
11921
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X24,alpha)11922 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X24, alpha) {
11923 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
11924 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
11925 VUnOpMicrokernelTester()
11926 .batch_size(batch_size)
11927 .alpha(alpha)
11928 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24), VUnOpMicrokernelTester::OpType::ELU);
11929 }
11930 }
11931 }
11932
TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X24,beta)11933 TEST(F32_VELU__WASMSIMD_X86_RR2_P6_X24, beta) {
11934 for (float beta : std::vector<float>({0.3f, 3.0f})) {
11935 for (size_t batch_size = 1; batch_size <= 120; batch_size += 23) {
11936 VUnOpMicrokernelTester()
11937 .batch_size(batch_size)
11938 .beta(beta)
11939 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasmsimd_x86_rr2_p6_x24), VUnOpMicrokernelTester::OpType::ELU);
11940 }
11941 }
11942 }
11943 #endif // XNN_ARCH_WASMSIMD
11944
11945
11946 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
TEST(F32_VELU__WASM_RR2_LUT16_P3_X1,batch_eq_1)11947 TEST(F32_VELU__WASM_RR2_LUT16_P3_X1, batch_eq_1) {
11948 VUnOpMicrokernelTester()
11949 .batch_size(1)
11950 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x1), VUnOpMicrokernelTester::OpType::ELU);
11951 }
11952
TEST(F32_VELU__WASM_RR2_LUT16_P3_X1,batch_gt_1)11953 TEST(F32_VELU__WASM_RR2_LUT16_P3_X1, batch_gt_1) {
11954 for (size_t batch_size = 2; batch_size < 10; batch_size++) {
11955 VUnOpMicrokernelTester()
11956 .batch_size(batch_size)
11957 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x1), VUnOpMicrokernelTester::OpType::ELU);
11958 }
11959 }
11960
TEST(F32_VELU__WASM_RR2_LUT16_P3_X1,inplace)11961 TEST(F32_VELU__WASM_RR2_LUT16_P3_X1, inplace) {
11962 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
11963 VUnOpMicrokernelTester()
11964 .batch_size(batch_size)
11965 .inplace(true)
11966 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x1), VUnOpMicrokernelTester::OpType::ELU);
11967 }
11968 }
11969
TEST(F32_VELU__WASM_RR2_LUT16_P3_X1,prescale)11970 TEST(F32_VELU__WASM_RR2_LUT16_P3_X1, prescale) {
11971 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
11972 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
11973 VUnOpMicrokernelTester()
11974 .batch_size(batch_size)
11975 .prescale(prescale)
11976 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x1), VUnOpMicrokernelTester::OpType::ELU);
11977 }
11978 }
11979 }
11980
TEST(F32_VELU__WASM_RR2_LUT16_P3_X1,alpha)11981 TEST(F32_VELU__WASM_RR2_LUT16_P3_X1, alpha) {
11982 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
11983 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
11984 VUnOpMicrokernelTester()
11985 .batch_size(batch_size)
11986 .alpha(alpha)
11987 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x1), VUnOpMicrokernelTester::OpType::ELU);
11988 }
11989 }
11990 }
11991
TEST(F32_VELU__WASM_RR2_LUT16_P3_X1,beta)11992 TEST(F32_VELU__WASM_RR2_LUT16_P3_X1, beta) {
11993 for (float beta : std::vector<float>({0.3f, 3.0f})) {
11994 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
11995 VUnOpMicrokernelTester()
11996 .batch_size(batch_size)
11997 .beta(beta)
11998 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x1), VUnOpMicrokernelTester::OpType::ELU);
11999 }
12000 }
12001 }
12002 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
12003
12004
12005 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
TEST(F32_VELU__WASM_RR2_LUT16_P3_X2,batch_eq_2)12006 TEST(F32_VELU__WASM_RR2_LUT16_P3_X2, batch_eq_2) {
12007 VUnOpMicrokernelTester()
12008 .batch_size(2)
12009 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x2), VUnOpMicrokernelTester::OpType::ELU);
12010 }
12011
TEST(F32_VELU__WASM_RR2_LUT16_P3_X2,batch_div_2)12012 TEST(F32_VELU__WASM_RR2_LUT16_P3_X2, batch_div_2) {
12013 for (size_t batch_size = 4; batch_size < 20; batch_size += 2) {
12014 VUnOpMicrokernelTester()
12015 .batch_size(batch_size)
12016 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x2), VUnOpMicrokernelTester::OpType::ELU);
12017 }
12018 }
12019
TEST(F32_VELU__WASM_RR2_LUT16_P3_X2,batch_lt_2)12020 TEST(F32_VELU__WASM_RR2_LUT16_P3_X2, batch_lt_2) {
12021 for (size_t batch_size = 1; batch_size < 2; batch_size++) {
12022 VUnOpMicrokernelTester()
12023 .batch_size(batch_size)
12024 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x2), VUnOpMicrokernelTester::OpType::ELU);
12025 }
12026 }
12027
TEST(F32_VELU__WASM_RR2_LUT16_P3_X2,batch_gt_2)12028 TEST(F32_VELU__WASM_RR2_LUT16_P3_X2, batch_gt_2) {
12029 for (size_t batch_size = 3; batch_size < 4; batch_size++) {
12030 VUnOpMicrokernelTester()
12031 .batch_size(batch_size)
12032 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x2), VUnOpMicrokernelTester::OpType::ELU);
12033 }
12034 }
12035
TEST(F32_VELU__WASM_RR2_LUT16_P3_X2,inplace)12036 TEST(F32_VELU__WASM_RR2_LUT16_P3_X2, inplace) {
12037 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
12038 VUnOpMicrokernelTester()
12039 .batch_size(batch_size)
12040 .inplace(true)
12041 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x2), VUnOpMicrokernelTester::OpType::ELU);
12042 }
12043 }
12044
TEST(F32_VELU__WASM_RR2_LUT16_P3_X2,prescale)12045 TEST(F32_VELU__WASM_RR2_LUT16_P3_X2, prescale) {
12046 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
12047 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
12048 VUnOpMicrokernelTester()
12049 .batch_size(batch_size)
12050 .prescale(prescale)
12051 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x2), VUnOpMicrokernelTester::OpType::ELU);
12052 }
12053 }
12054 }
12055
TEST(F32_VELU__WASM_RR2_LUT16_P3_X2,alpha)12056 TEST(F32_VELU__WASM_RR2_LUT16_P3_X2, alpha) {
12057 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
12058 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
12059 VUnOpMicrokernelTester()
12060 .batch_size(batch_size)
12061 .alpha(alpha)
12062 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x2), VUnOpMicrokernelTester::OpType::ELU);
12063 }
12064 }
12065 }
12066
TEST(F32_VELU__WASM_RR2_LUT16_P3_X2,beta)12067 TEST(F32_VELU__WASM_RR2_LUT16_P3_X2, beta) {
12068 for (float beta : std::vector<float>({0.3f, 3.0f})) {
12069 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
12070 VUnOpMicrokernelTester()
12071 .batch_size(batch_size)
12072 .beta(beta)
12073 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x2), VUnOpMicrokernelTester::OpType::ELU);
12074 }
12075 }
12076 }
12077 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
12078
12079
12080 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
TEST(F32_VELU__WASM_RR2_LUT16_P3_X3,batch_eq_3)12081 TEST(F32_VELU__WASM_RR2_LUT16_P3_X3, batch_eq_3) {
12082 VUnOpMicrokernelTester()
12083 .batch_size(3)
12084 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x3), VUnOpMicrokernelTester::OpType::ELU);
12085 }
12086
TEST(F32_VELU__WASM_RR2_LUT16_P3_X3,batch_div_3)12087 TEST(F32_VELU__WASM_RR2_LUT16_P3_X3, batch_div_3) {
12088 for (size_t batch_size = 6; batch_size < 30; batch_size += 3) {
12089 VUnOpMicrokernelTester()
12090 .batch_size(batch_size)
12091 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x3), VUnOpMicrokernelTester::OpType::ELU);
12092 }
12093 }
12094
TEST(F32_VELU__WASM_RR2_LUT16_P3_X3,batch_lt_3)12095 TEST(F32_VELU__WASM_RR2_LUT16_P3_X3, batch_lt_3) {
12096 for (size_t batch_size = 1; batch_size < 3; batch_size++) {
12097 VUnOpMicrokernelTester()
12098 .batch_size(batch_size)
12099 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x3), VUnOpMicrokernelTester::OpType::ELU);
12100 }
12101 }
12102
TEST(F32_VELU__WASM_RR2_LUT16_P3_X3,batch_gt_3)12103 TEST(F32_VELU__WASM_RR2_LUT16_P3_X3, batch_gt_3) {
12104 for (size_t batch_size = 4; batch_size < 6; batch_size++) {
12105 VUnOpMicrokernelTester()
12106 .batch_size(batch_size)
12107 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x3), VUnOpMicrokernelTester::OpType::ELU);
12108 }
12109 }
12110
TEST(F32_VELU__WASM_RR2_LUT16_P3_X3,inplace)12111 TEST(F32_VELU__WASM_RR2_LUT16_P3_X3, inplace) {
12112 for (size_t batch_size = 1; batch_size <= 15; batch_size += 2) {
12113 VUnOpMicrokernelTester()
12114 .batch_size(batch_size)
12115 .inplace(true)
12116 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x3), VUnOpMicrokernelTester::OpType::ELU);
12117 }
12118 }
12119
TEST(F32_VELU__WASM_RR2_LUT16_P3_X3,prescale)12120 TEST(F32_VELU__WASM_RR2_LUT16_P3_X3, prescale) {
12121 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
12122 for (size_t batch_size = 1; batch_size <= 15; batch_size += 2) {
12123 VUnOpMicrokernelTester()
12124 .batch_size(batch_size)
12125 .prescale(prescale)
12126 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x3), VUnOpMicrokernelTester::OpType::ELU);
12127 }
12128 }
12129 }
12130
TEST(F32_VELU__WASM_RR2_LUT16_P3_X3,alpha)12131 TEST(F32_VELU__WASM_RR2_LUT16_P3_X3, alpha) {
12132 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
12133 for (size_t batch_size = 1; batch_size <= 15; batch_size += 2) {
12134 VUnOpMicrokernelTester()
12135 .batch_size(batch_size)
12136 .alpha(alpha)
12137 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x3), VUnOpMicrokernelTester::OpType::ELU);
12138 }
12139 }
12140 }
12141
TEST(F32_VELU__WASM_RR2_LUT16_P3_X3,beta)12142 TEST(F32_VELU__WASM_RR2_LUT16_P3_X3, beta) {
12143 for (float beta : std::vector<float>({0.3f, 3.0f})) {
12144 for (size_t batch_size = 1; batch_size <= 15; batch_size += 2) {
12145 VUnOpMicrokernelTester()
12146 .batch_size(batch_size)
12147 .beta(beta)
12148 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x3), VUnOpMicrokernelTester::OpType::ELU);
12149 }
12150 }
12151 }
12152 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
12153
12154
12155 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
TEST(F32_VELU__WASM_RR2_LUT16_P3_X4,batch_eq_4)12156 TEST(F32_VELU__WASM_RR2_LUT16_P3_X4, batch_eq_4) {
12157 VUnOpMicrokernelTester()
12158 .batch_size(4)
12159 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x4), VUnOpMicrokernelTester::OpType::ELU);
12160 }
12161
TEST(F32_VELU__WASM_RR2_LUT16_P3_X4,batch_div_4)12162 TEST(F32_VELU__WASM_RR2_LUT16_P3_X4, batch_div_4) {
12163 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
12164 VUnOpMicrokernelTester()
12165 .batch_size(batch_size)
12166 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x4), VUnOpMicrokernelTester::OpType::ELU);
12167 }
12168 }
12169
TEST(F32_VELU__WASM_RR2_LUT16_P3_X4,batch_lt_4)12170 TEST(F32_VELU__WASM_RR2_LUT16_P3_X4, batch_lt_4) {
12171 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
12172 VUnOpMicrokernelTester()
12173 .batch_size(batch_size)
12174 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x4), VUnOpMicrokernelTester::OpType::ELU);
12175 }
12176 }
12177
TEST(F32_VELU__WASM_RR2_LUT16_P3_X4,batch_gt_4)12178 TEST(F32_VELU__WASM_RR2_LUT16_P3_X4, batch_gt_4) {
12179 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
12180 VUnOpMicrokernelTester()
12181 .batch_size(batch_size)
12182 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x4), VUnOpMicrokernelTester::OpType::ELU);
12183 }
12184 }
12185
TEST(F32_VELU__WASM_RR2_LUT16_P3_X4,inplace)12186 TEST(F32_VELU__WASM_RR2_LUT16_P3_X4, inplace) {
12187 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
12188 VUnOpMicrokernelTester()
12189 .batch_size(batch_size)
12190 .inplace(true)
12191 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x4), VUnOpMicrokernelTester::OpType::ELU);
12192 }
12193 }
12194
TEST(F32_VELU__WASM_RR2_LUT16_P3_X4,prescale)12195 TEST(F32_VELU__WASM_RR2_LUT16_P3_X4, prescale) {
12196 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
12197 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
12198 VUnOpMicrokernelTester()
12199 .batch_size(batch_size)
12200 .prescale(prescale)
12201 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x4), VUnOpMicrokernelTester::OpType::ELU);
12202 }
12203 }
12204 }
12205
TEST(F32_VELU__WASM_RR2_LUT16_P3_X4,alpha)12206 TEST(F32_VELU__WASM_RR2_LUT16_P3_X4, alpha) {
12207 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
12208 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
12209 VUnOpMicrokernelTester()
12210 .batch_size(batch_size)
12211 .alpha(alpha)
12212 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x4), VUnOpMicrokernelTester::OpType::ELU);
12213 }
12214 }
12215 }
12216
TEST(F32_VELU__WASM_RR2_LUT16_P3_X4,beta)12217 TEST(F32_VELU__WASM_RR2_LUT16_P3_X4, beta) {
12218 for (float beta : std::vector<float>({0.3f, 3.0f})) {
12219 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
12220 VUnOpMicrokernelTester()
12221 .batch_size(batch_size)
12222 .beta(beta)
12223 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x4), VUnOpMicrokernelTester::OpType::ELU);
12224 }
12225 }
12226 }
12227 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
12228
12229
12230 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
TEST(F32_VELU__WASM_RR2_LUT16_P3_X5,batch_eq_5)12231 TEST(F32_VELU__WASM_RR2_LUT16_P3_X5, batch_eq_5) {
12232 VUnOpMicrokernelTester()
12233 .batch_size(5)
12234 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5), VUnOpMicrokernelTester::OpType::ELU);
12235 }
12236
TEST(F32_VELU__WASM_RR2_LUT16_P3_X5,batch_div_5)12237 TEST(F32_VELU__WASM_RR2_LUT16_P3_X5, batch_div_5) {
12238 for (size_t batch_size = 10; batch_size < 50; batch_size += 5) {
12239 VUnOpMicrokernelTester()
12240 .batch_size(batch_size)
12241 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5), VUnOpMicrokernelTester::OpType::ELU);
12242 }
12243 }
12244
TEST(F32_VELU__WASM_RR2_LUT16_P3_X5,batch_lt_5)12245 TEST(F32_VELU__WASM_RR2_LUT16_P3_X5, batch_lt_5) {
12246 for (size_t batch_size = 1; batch_size < 5; batch_size++) {
12247 VUnOpMicrokernelTester()
12248 .batch_size(batch_size)
12249 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5), VUnOpMicrokernelTester::OpType::ELU);
12250 }
12251 }
12252
TEST(F32_VELU__WASM_RR2_LUT16_P3_X5,batch_gt_5)12253 TEST(F32_VELU__WASM_RR2_LUT16_P3_X5, batch_gt_5) {
12254 for (size_t batch_size = 6; batch_size < 10; batch_size++) {
12255 VUnOpMicrokernelTester()
12256 .batch_size(batch_size)
12257 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5), VUnOpMicrokernelTester::OpType::ELU);
12258 }
12259 }
12260
TEST(F32_VELU__WASM_RR2_LUT16_P3_X5,inplace)12261 TEST(F32_VELU__WASM_RR2_LUT16_P3_X5, inplace) {
12262 for (size_t batch_size = 1; batch_size <= 25; batch_size += 4) {
12263 VUnOpMicrokernelTester()
12264 .batch_size(batch_size)
12265 .inplace(true)
12266 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5), VUnOpMicrokernelTester::OpType::ELU);
12267 }
12268 }
12269
TEST(F32_VELU__WASM_RR2_LUT16_P3_X5,prescale)12270 TEST(F32_VELU__WASM_RR2_LUT16_P3_X5, prescale) {
12271 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
12272 for (size_t batch_size = 1; batch_size <= 25; batch_size += 4) {
12273 VUnOpMicrokernelTester()
12274 .batch_size(batch_size)
12275 .prescale(prescale)
12276 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5), VUnOpMicrokernelTester::OpType::ELU);
12277 }
12278 }
12279 }
12280
TEST(F32_VELU__WASM_RR2_LUT16_P3_X5,alpha)12281 TEST(F32_VELU__WASM_RR2_LUT16_P3_X5, alpha) {
12282 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
12283 for (size_t batch_size = 1; batch_size <= 25; batch_size += 4) {
12284 VUnOpMicrokernelTester()
12285 .batch_size(batch_size)
12286 .alpha(alpha)
12287 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5), VUnOpMicrokernelTester::OpType::ELU);
12288 }
12289 }
12290 }
12291
TEST(F32_VELU__WASM_RR2_LUT16_P3_X5,beta)12292 TEST(F32_VELU__WASM_RR2_LUT16_P3_X5, beta) {
12293 for (float beta : std::vector<float>({0.3f, 3.0f})) {
12294 for (size_t batch_size = 1; batch_size <= 25; batch_size += 4) {
12295 VUnOpMicrokernelTester()
12296 .batch_size(batch_size)
12297 .beta(beta)
12298 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5), VUnOpMicrokernelTester::OpType::ELU);
12299 }
12300 }
12301 }
12302 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
12303
12304
12305 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
TEST(F32_VELU__WASM_RR2_LUT16_P3_X6,batch_eq_6)12306 TEST(F32_VELU__WASM_RR2_LUT16_P3_X6, batch_eq_6) {
12307 VUnOpMicrokernelTester()
12308 .batch_size(6)
12309 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6), VUnOpMicrokernelTester::OpType::ELU);
12310 }
12311
TEST(F32_VELU__WASM_RR2_LUT16_P3_X6,batch_div_6)12312 TEST(F32_VELU__WASM_RR2_LUT16_P3_X6, batch_div_6) {
12313 for (size_t batch_size = 12; batch_size < 60; batch_size += 6) {
12314 VUnOpMicrokernelTester()
12315 .batch_size(batch_size)
12316 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6), VUnOpMicrokernelTester::OpType::ELU);
12317 }
12318 }
12319
TEST(F32_VELU__WASM_RR2_LUT16_P3_X6,batch_lt_6)12320 TEST(F32_VELU__WASM_RR2_LUT16_P3_X6, batch_lt_6) {
12321 for (size_t batch_size = 1; batch_size < 6; batch_size++) {
12322 VUnOpMicrokernelTester()
12323 .batch_size(batch_size)
12324 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6), VUnOpMicrokernelTester::OpType::ELU);
12325 }
12326 }
12327
TEST(F32_VELU__WASM_RR2_LUT16_P3_X6,batch_gt_6)12328 TEST(F32_VELU__WASM_RR2_LUT16_P3_X6, batch_gt_6) {
12329 for (size_t batch_size = 7; batch_size < 12; batch_size++) {
12330 VUnOpMicrokernelTester()
12331 .batch_size(batch_size)
12332 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6), VUnOpMicrokernelTester::OpType::ELU);
12333 }
12334 }
12335
TEST(F32_VELU__WASM_RR2_LUT16_P3_X6,inplace)12336 TEST(F32_VELU__WASM_RR2_LUT16_P3_X6, inplace) {
12337 for (size_t batch_size = 1; batch_size <= 30; batch_size += 5) {
12338 VUnOpMicrokernelTester()
12339 .batch_size(batch_size)
12340 .inplace(true)
12341 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6), VUnOpMicrokernelTester::OpType::ELU);
12342 }
12343 }
12344
TEST(F32_VELU__WASM_RR2_LUT16_P3_X6,prescale)12345 TEST(F32_VELU__WASM_RR2_LUT16_P3_X6, prescale) {
12346 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
12347 for (size_t batch_size = 1; batch_size <= 30; batch_size += 5) {
12348 VUnOpMicrokernelTester()
12349 .batch_size(batch_size)
12350 .prescale(prescale)
12351 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6), VUnOpMicrokernelTester::OpType::ELU);
12352 }
12353 }
12354 }
12355
TEST(F32_VELU__WASM_RR2_LUT16_P3_X6,alpha)12356 TEST(F32_VELU__WASM_RR2_LUT16_P3_X6, alpha) {
12357 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
12358 for (size_t batch_size = 1; batch_size <= 30; batch_size += 5) {
12359 VUnOpMicrokernelTester()
12360 .batch_size(batch_size)
12361 .alpha(alpha)
12362 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6), VUnOpMicrokernelTester::OpType::ELU);
12363 }
12364 }
12365 }
12366
TEST(F32_VELU__WASM_RR2_LUT16_P3_X6,beta)12367 TEST(F32_VELU__WASM_RR2_LUT16_P3_X6, beta) {
12368 for (float beta : std::vector<float>({0.3f, 3.0f})) {
12369 for (size_t batch_size = 1; batch_size <= 30; batch_size += 5) {
12370 VUnOpMicrokernelTester()
12371 .batch_size(batch_size)
12372 .beta(beta)
12373 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6), VUnOpMicrokernelTester::OpType::ELU);
12374 }
12375 }
12376 }
12377 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
12378
12379
12380 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
TEST(F32_VELU__WASM_RR2_P6_X1,batch_eq_1)12381 TEST(F32_VELU__WASM_RR2_P6_X1, batch_eq_1) {
12382 VUnOpMicrokernelTester()
12383 .batch_size(1)
12384 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_p6_x1), VUnOpMicrokernelTester::OpType::ELU);
12385 }
12386
TEST(F32_VELU__WASM_RR2_P6_X1,batch_gt_1)12387 TEST(F32_VELU__WASM_RR2_P6_X1, batch_gt_1) {
12388 for (size_t batch_size = 2; batch_size < 10; batch_size++) {
12389 VUnOpMicrokernelTester()
12390 .batch_size(batch_size)
12391 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_p6_x1), VUnOpMicrokernelTester::OpType::ELU);
12392 }
12393 }
12394
TEST(F32_VELU__WASM_RR2_P6_X1,inplace)12395 TEST(F32_VELU__WASM_RR2_P6_X1, inplace) {
12396 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
12397 VUnOpMicrokernelTester()
12398 .batch_size(batch_size)
12399 .inplace(true)
12400 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_p6_x1), VUnOpMicrokernelTester::OpType::ELU);
12401 }
12402 }
12403
TEST(F32_VELU__WASM_RR2_P6_X1,prescale)12404 TEST(F32_VELU__WASM_RR2_P6_X1, prescale) {
12405 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
12406 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
12407 VUnOpMicrokernelTester()
12408 .batch_size(batch_size)
12409 .prescale(prescale)
12410 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_p6_x1), VUnOpMicrokernelTester::OpType::ELU);
12411 }
12412 }
12413 }
12414
TEST(F32_VELU__WASM_RR2_P6_X1,alpha)12415 TEST(F32_VELU__WASM_RR2_P6_X1, alpha) {
12416 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
12417 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
12418 VUnOpMicrokernelTester()
12419 .batch_size(batch_size)
12420 .alpha(alpha)
12421 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_p6_x1), VUnOpMicrokernelTester::OpType::ELU);
12422 }
12423 }
12424 }
12425
TEST(F32_VELU__WASM_RR2_P6_X1,beta)12426 TEST(F32_VELU__WASM_RR2_P6_X1, beta) {
12427 for (float beta : std::vector<float>({0.3f, 3.0f})) {
12428 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
12429 VUnOpMicrokernelTester()
12430 .batch_size(batch_size)
12431 .beta(beta)
12432 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_p6_x1), VUnOpMicrokernelTester::OpType::ELU);
12433 }
12434 }
12435 }
12436 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
12437
12438
12439 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
TEST(F32_VELU__WASM_RR2_P6_X2,batch_eq_2)12440 TEST(F32_VELU__WASM_RR2_P6_X2, batch_eq_2) {
12441 VUnOpMicrokernelTester()
12442 .batch_size(2)
12443 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_p6_x2), VUnOpMicrokernelTester::OpType::ELU);
12444 }
12445
TEST(F32_VELU__WASM_RR2_P6_X2,batch_div_2)12446 TEST(F32_VELU__WASM_RR2_P6_X2, batch_div_2) {
12447 for (size_t batch_size = 4; batch_size < 20; batch_size += 2) {
12448 VUnOpMicrokernelTester()
12449 .batch_size(batch_size)
12450 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_p6_x2), VUnOpMicrokernelTester::OpType::ELU);
12451 }
12452 }
12453
TEST(F32_VELU__WASM_RR2_P6_X2,batch_lt_2)12454 TEST(F32_VELU__WASM_RR2_P6_X2, batch_lt_2) {
12455 for (size_t batch_size = 1; batch_size < 2; batch_size++) {
12456 VUnOpMicrokernelTester()
12457 .batch_size(batch_size)
12458 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_p6_x2), VUnOpMicrokernelTester::OpType::ELU);
12459 }
12460 }
12461
TEST(F32_VELU__WASM_RR2_P6_X2,batch_gt_2)12462 TEST(F32_VELU__WASM_RR2_P6_X2, batch_gt_2) {
12463 for (size_t batch_size = 3; batch_size < 4; batch_size++) {
12464 VUnOpMicrokernelTester()
12465 .batch_size(batch_size)
12466 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_p6_x2), VUnOpMicrokernelTester::OpType::ELU);
12467 }
12468 }
12469
TEST(F32_VELU__WASM_RR2_P6_X2,inplace)12470 TEST(F32_VELU__WASM_RR2_P6_X2, inplace) {
12471 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
12472 VUnOpMicrokernelTester()
12473 .batch_size(batch_size)
12474 .inplace(true)
12475 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_p6_x2), VUnOpMicrokernelTester::OpType::ELU);
12476 }
12477 }
12478
TEST(F32_VELU__WASM_RR2_P6_X2,prescale)12479 TEST(F32_VELU__WASM_RR2_P6_X2, prescale) {
12480 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
12481 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
12482 VUnOpMicrokernelTester()
12483 .batch_size(batch_size)
12484 .prescale(prescale)
12485 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_p6_x2), VUnOpMicrokernelTester::OpType::ELU);
12486 }
12487 }
12488 }
12489
TEST(F32_VELU__WASM_RR2_P6_X2,alpha)12490 TEST(F32_VELU__WASM_RR2_P6_X2, alpha) {
12491 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
12492 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
12493 VUnOpMicrokernelTester()
12494 .batch_size(batch_size)
12495 .alpha(alpha)
12496 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_p6_x2), VUnOpMicrokernelTester::OpType::ELU);
12497 }
12498 }
12499 }
12500
TEST(F32_VELU__WASM_RR2_P6_X2,beta)12501 TEST(F32_VELU__WASM_RR2_P6_X2, beta) {
12502 for (float beta : std::vector<float>({0.3f, 3.0f})) {
12503 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
12504 VUnOpMicrokernelTester()
12505 .batch_size(batch_size)
12506 .beta(beta)
12507 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_p6_x2), VUnOpMicrokernelTester::OpType::ELU);
12508 }
12509 }
12510 }
12511 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
12512
12513
12514 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
TEST(F32_VELU__WASM_RR2_P6_X3,batch_eq_3)12515 TEST(F32_VELU__WASM_RR2_P6_X3, batch_eq_3) {
12516 VUnOpMicrokernelTester()
12517 .batch_size(3)
12518 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_p6_x3), VUnOpMicrokernelTester::OpType::ELU);
12519 }
12520
TEST(F32_VELU__WASM_RR2_P6_X3,batch_div_3)12521 TEST(F32_VELU__WASM_RR2_P6_X3, batch_div_3) {
12522 for (size_t batch_size = 6; batch_size < 30; batch_size += 3) {
12523 VUnOpMicrokernelTester()
12524 .batch_size(batch_size)
12525 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_p6_x3), VUnOpMicrokernelTester::OpType::ELU);
12526 }
12527 }
12528
TEST(F32_VELU__WASM_RR2_P6_X3,batch_lt_3)12529 TEST(F32_VELU__WASM_RR2_P6_X3, batch_lt_3) {
12530 for (size_t batch_size = 1; batch_size < 3; batch_size++) {
12531 VUnOpMicrokernelTester()
12532 .batch_size(batch_size)
12533 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_p6_x3), VUnOpMicrokernelTester::OpType::ELU);
12534 }
12535 }
12536
TEST(F32_VELU__WASM_RR2_P6_X3,batch_gt_3)12537 TEST(F32_VELU__WASM_RR2_P6_X3, batch_gt_3) {
12538 for (size_t batch_size = 4; batch_size < 6; batch_size++) {
12539 VUnOpMicrokernelTester()
12540 .batch_size(batch_size)
12541 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_p6_x3), VUnOpMicrokernelTester::OpType::ELU);
12542 }
12543 }
12544
TEST(F32_VELU__WASM_RR2_P6_X3,inplace)12545 TEST(F32_VELU__WASM_RR2_P6_X3, inplace) {
12546 for (size_t batch_size = 1; batch_size <= 15; batch_size += 2) {
12547 VUnOpMicrokernelTester()
12548 .batch_size(batch_size)
12549 .inplace(true)
12550 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_p6_x3), VUnOpMicrokernelTester::OpType::ELU);
12551 }
12552 }
12553
TEST(F32_VELU__WASM_RR2_P6_X3,prescale)12554 TEST(F32_VELU__WASM_RR2_P6_X3, prescale) {
12555 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
12556 for (size_t batch_size = 1; batch_size <= 15; batch_size += 2) {
12557 VUnOpMicrokernelTester()
12558 .batch_size(batch_size)
12559 .prescale(prescale)
12560 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_p6_x3), VUnOpMicrokernelTester::OpType::ELU);
12561 }
12562 }
12563 }
12564
TEST(F32_VELU__WASM_RR2_P6_X3,alpha)12565 TEST(F32_VELU__WASM_RR2_P6_X3, alpha) {
12566 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
12567 for (size_t batch_size = 1; batch_size <= 15; batch_size += 2) {
12568 VUnOpMicrokernelTester()
12569 .batch_size(batch_size)
12570 .alpha(alpha)
12571 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_p6_x3), VUnOpMicrokernelTester::OpType::ELU);
12572 }
12573 }
12574 }
12575
TEST(F32_VELU__WASM_RR2_P6_X3,beta)12576 TEST(F32_VELU__WASM_RR2_P6_X3, beta) {
12577 for (float beta : std::vector<float>({0.3f, 3.0f})) {
12578 for (size_t batch_size = 1; batch_size <= 15; batch_size += 2) {
12579 VUnOpMicrokernelTester()
12580 .batch_size(batch_size)
12581 .beta(beta)
12582 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_p6_x3), VUnOpMicrokernelTester::OpType::ELU);
12583 }
12584 }
12585 }
12586 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
12587
12588
12589 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
TEST(F32_VELU__WASM_RR2_P6_X4,batch_eq_4)12590 TEST(F32_VELU__WASM_RR2_P6_X4, batch_eq_4) {
12591 VUnOpMicrokernelTester()
12592 .batch_size(4)
12593 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_p6_x4), VUnOpMicrokernelTester::OpType::ELU);
12594 }
12595
TEST(F32_VELU__WASM_RR2_P6_X4,batch_div_4)12596 TEST(F32_VELU__WASM_RR2_P6_X4, batch_div_4) {
12597 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
12598 VUnOpMicrokernelTester()
12599 .batch_size(batch_size)
12600 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_p6_x4), VUnOpMicrokernelTester::OpType::ELU);
12601 }
12602 }
12603
TEST(F32_VELU__WASM_RR2_P6_X4,batch_lt_4)12604 TEST(F32_VELU__WASM_RR2_P6_X4, batch_lt_4) {
12605 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
12606 VUnOpMicrokernelTester()
12607 .batch_size(batch_size)
12608 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_p6_x4), VUnOpMicrokernelTester::OpType::ELU);
12609 }
12610 }
12611
TEST(F32_VELU__WASM_RR2_P6_X4,batch_gt_4)12612 TEST(F32_VELU__WASM_RR2_P6_X4, batch_gt_4) {
12613 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
12614 VUnOpMicrokernelTester()
12615 .batch_size(batch_size)
12616 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_p6_x4), VUnOpMicrokernelTester::OpType::ELU);
12617 }
12618 }
12619
TEST(F32_VELU__WASM_RR2_P6_X4,inplace)12620 TEST(F32_VELU__WASM_RR2_P6_X4, inplace) {
12621 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
12622 VUnOpMicrokernelTester()
12623 .batch_size(batch_size)
12624 .inplace(true)
12625 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_p6_x4), VUnOpMicrokernelTester::OpType::ELU);
12626 }
12627 }
12628
TEST(F32_VELU__WASM_RR2_P6_X4,prescale)12629 TEST(F32_VELU__WASM_RR2_P6_X4, prescale) {
12630 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
12631 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
12632 VUnOpMicrokernelTester()
12633 .batch_size(batch_size)
12634 .prescale(prescale)
12635 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_p6_x4), VUnOpMicrokernelTester::OpType::ELU);
12636 }
12637 }
12638 }
12639
TEST(F32_VELU__WASM_RR2_P6_X4,alpha)12640 TEST(F32_VELU__WASM_RR2_P6_X4, alpha) {
12641 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
12642 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
12643 VUnOpMicrokernelTester()
12644 .batch_size(batch_size)
12645 .alpha(alpha)
12646 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_p6_x4), VUnOpMicrokernelTester::OpType::ELU);
12647 }
12648 }
12649 }
12650
TEST(F32_VELU__WASM_RR2_P6_X4,beta)12651 TEST(F32_VELU__WASM_RR2_P6_X4, beta) {
12652 for (float beta : std::vector<float>({0.3f, 3.0f})) {
12653 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
12654 VUnOpMicrokernelTester()
12655 .batch_size(batch_size)
12656 .beta(beta)
12657 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_p6_x4), VUnOpMicrokernelTester::OpType::ELU);
12658 }
12659 }
12660 }
12661 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
12662
12663
12664 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
TEST(F32_VELU__WASM_RR2_P6_X5,batch_eq_5)12665 TEST(F32_VELU__WASM_RR2_P6_X5, batch_eq_5) {
12666 VUnOpMicrokernelTester()
12667 .batch_size(5)
12668 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_p6_x5), VUnOpMicrokernelTester::OpType::ELU);
12669 }
12670
TEST(F32_VELU__WASM_RR2_P6_X5,batch_div_5)12671 TEST(F32_VELU__WASM_RR2_P6_X5, batch_div_5) {
12672 for (size_t batch_size = 10; batch_size < 50; batch_size += 5) {
12673 VUnOpMicrokernelTester()
12674 .batch_size(batch_size)
12675 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_p6_x5), VUnOpMicrokernelTester::OpType::ELU);
12676 }
12677 }
12678
TEST(F32_VELU__WASM_RR2_P6_X5,batch_lt_5)12679 TEST(F32_VELU__WASM_RR2_P6_X5, batch_lt_5) {
12680 for (size_t batch_size = 1; batch_size < 5; batch_size++) {
12681 VUnOpMicrokernelTester()
12682 .batch_size(batch_size)
12683 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_p6_x5), VUnOpMicrokernelTester::OpType::ELU);
12684 }
12685 }
12686
TEST(F32_VELU__WASM_RR2_P6_X5,batch_gt_5)12687 TEST(F32_VELU__WASM_RR2_P6_X5, batch_gt_5) {
12688 for (size_t batch_size = 6; batch_size < 10; batch_size++) {
12689 VUnOpMicrokernelTester()
12690 .batch_size(batch_size)
12691 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_p6_x5), VUnOpMicrokernelTester::OpType::ELU);
12692 }
12693 }
12694
TEST(F32_VELU__WASM_RR2_P6_X5,inplace)12695 TEST(F32_VELU__WASM_RR2_P6_X5, inplace) {
12696 for (size_t batch_size = 1; batch_size <= 25; batch_size += 4) {
12697 VUnOpMicrokernelTester()
12698 .batch_size(batch_size)
12699 .inplace(true)
12700 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_p6_x5), VUnOpMicrokernelTester::OpType::ELU);
12701 }
12702 }
12703
TEST(F32_VELU__WASM_RR2_P6_X5,prescale)12704 TEST(F32_VELU__WASM_RR2_P6_X5, prescale) {
12705 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
12706 for (size_t batch_size = 1; batch_size <= 25; batch_size += 4) {
12707 VUnOpMicrokernelTester()
12708 .batch_size(batch_size)
12709 .prescale(prescale)
12710 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_p6_x5), VUnOpMicrokernelTester::OpType::ELU);
12711 }
12712 }
12713 }
12714
TEST(F32_VELU__WASM_RR2_P6_X5,alpha)12715 TEST(F32_VELU__WASM_RR2_P6_X5, alpha) {
12716 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
12717 for (size_t batch_size = 1; batch_size <= 25; batch_size += 4) {
12718 VUnOpMicrokernelTester()
12719 .batch_size(batch_size)
12720 .alpha(alpha)
12721 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_p6_x5), VUnOpMicrokernelTester::OpType::ELU);
12722 }
12723 }
12724 }
12725
TEST(F32_VELU__WASM_RR2_P6_X5,beta)12726 TEST(F32_VELU__WASM_RR2_P6_X5, beta) {
12727 for (float beta : std::vector<float>({0.3f, 3.0f})) {
12728 for (size_t batch_size = 1; batch_size <= 25; batch_size += 4) {
12729 VUnOpMicrokernelTester()
12730 .batch_size(batch_size)
12731 .beta(beta)
12732 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_p6_x5), VUnOpMicrokernelTester::OpType::ELU);
12733 }
12734 }
12735 }
12736 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
12737
12738
12739 #if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
TEST(F32_VELU__WASM_RR2_P6_X6,batch_eq_6)12740 TEST(F32_VELU__WASM_RR2_P6_X6, batch_eq_6) {
12741 VUnOpMicrokernelTester()
12742 .batch_size(6)
12743 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_p6_x6), VUnOpMicrokernelTester::OpType::ELU);
12744 }
12745
TEST(F32_VELU__WASM_RR2_P6_X6,batch_div_6)12746 TEST(F32_VELU__WASM_RR2_P6_X6, batch_div_6) {
12747 for (size_t batch_size = 12; batch_size < 60; batch_size += 6) {
12748 VUnOpMicrokernelTester()
12749 .batch_size(batch_size)
12750 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_p6_x6), VUnOpMicrokernelTester::OpType::ELU);
12751 }
12752 }
12753
TEST(F32_VELU__WASM_RR2_P6_X6,batch_lt_6)12754 TEST(F32_VELU__WASM_RR2_P6_X6, batch_lt_6) {
12755 for (size_t batch_size = 1; batch_size < 6; batch_size++) {
12756 VUnOpMicrokernelTester()
12757 .batch_size(batch_size)
12758 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_p6_x6), VUnOpMicrokernelTester::OpType::ELU);
12759 }
12760 }
12761
TEST(F32_VELU__WASM_RR2_P6_X6,batch_gt_6)12762 TEST(F32_VELU__WASM_RR2_P6_X6, batch_gt_6) {
12763 for (size_t batch_size = 7; batch_size < 12; batch_size++) {
12764 VUnOpMicrokernelTester()
12765 .batch_size(batch_size)
12766 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_p6_x6), VUnOpMicrokernelTester::OpType::ELU);
12767 }
12768 }
12769
TEST(F32_VELU__WASM_RR2_P6_X6,inplace)12770 TEST(F32_VELU__WASM_RR2_P6_X6, inplace) {
12771 for (size_t batch_size = 1; batch_size <= 30; batch_size += 5) {
12772 VUnOpMicrokernelTester()
12773 .batch_size(batch_size)
12774 .inplace(true)
12775 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_p6_x6), VUnOpMicrokernelTester::OpType::ELU);
12776 }
12777 }
12778
TEST(F32_VELU__WASM_RR2_P6_X6,prescale)12779 TEST(F32_VELU__WASM_RR2_P6_X6, prescale) {
12780 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
12781 for (size_t batch_size = 1; batch_size <= 30; batch_size += 5) {
12782 VUnOpMicrokernelTester()
12783 .batch_size(batch_size)
12784 .prescale(prescale)
12785 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_p6_x6), VUnOpMicrokernelTester::OpType::ELU);
12786 }
12787 }
12788 }
12789
TEST(F32_VELU__WASM_RR2_P6_X6,alpha)12790 TEST(F32_VELU__WASM_RR2_P6_X6, alpha) {
12791 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
12792 for (size_t batch_size = 1; batch_size <= 30; batch_size += 5) {
12793 VUnOpMicrokernelTester()
12794 .batch_size(batch_size)
12795 .alpha(alpha)
12796 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_p6_x6), VUnOpMicrokernelTester::OpType::ELU);
12797 }
12798 }
12799 }
12800
TEST(F32_VELU__WASM_RR2_P6_X6,beta)12801 TEST(F32_VELU__WASM_RR2_P6_X6, beta) {
12802 for (float beta : std::vector<float>({0.3f, 3.0f})) {
12803 for (size_t batch_size = 1; batch_size <= 30; batch_size += 5) {
12804 VUnOpMicrokernelTester()
12805 .batch_size(batch_size)
12806 .beta(beta)
12807 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__wasm_rr2_p6_x6), VUnOpMicrokernelTester::OpType::ELU);
12808 }
12809 }
12810 }
12811 #endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD
12812
12813
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X1,batch_eq_1)12814 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X1, batch_eq_1) {
12815 VUnOpMicrokernelTester()
12816 .batch_size(1)
12817 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x1), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
12818 }
12819
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X1,batch_gt_1)12820 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X1, batch_gt_1) {
12821 for (size_t batch_size = 2; batch_size < 10; batch_size++) {
12822 VUnOpMicrokernelTester()
12823 .batch_size(batch_size)
12824 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x1), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
12825 }
12826 }
12827
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X1,inplace)12828 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X1, inplace) {
12829 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
12830 VUnOpMicrokernelTester()
12831 .batch_size(batch_size)
12832 .inplace(true)
12833 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x1), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
12834 }
12835 }
12836
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X1,prescale)12837 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X1, prescale) {
12838 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
12839 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
12840 VUnOpMicrokernelTester()
12841 .batch_size(batch_size)
12842 .prescale(prescale)
12843 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x1), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
12844 }
12845 }
12846 }
12847
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X1,alpha)12848 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X1, alpha) {
12849 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
12850 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
12851 VUnOpMicrokernelTester()
12852 .batch_size(batch_size)
12853 .alpha(alpha)
12854 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x1), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
12855 }
12856 }
12857 }
12858
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X1,beta)12859 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X1, beta) {
12860 for (float beta : std::vector<float>({0.3f, 3.0f})) {
12861 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
12862 VUnOpMicrokernelTester()
12863 .batch_size(batch_size)
12864 .beta(beta)
12865 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x1), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
12866 }
12867 }
12868 }
12869
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X2,batch_eq_2)12870 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X2, batch_eq_2) {
12871 VUnOpMicrokernelTester()
12872 .batch_size(2)
12873 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x2), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
12874 }
12875
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X2,batch_div_2)12876 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X2, batch_div_2) {
12877 for (size_t batch_size = 4; batch_size < 20; batch_size += 2) {
12878 VUnOpMicrokernelTester()
12879 .batch_size(batch_size)
12880 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x2), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
12881 }
12882 }
12883
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X2,batch_lt_2)12884 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X2, batch_lt_2) {
12885 for (size_t batch_size = 1; batch_size < 2; batch_size++) {
12886 VUnOpMicrokernelTester()
12887 .batch_size(batch_size)
12888 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x2), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
12889 }
12890 }
12891
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X2,batch_gt_2)12892 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X2, batch_gt_2) {
12893 for (size_t batch_size = 3; batch_size < 4; batch_size++) {
12894 VUnOpMicrokernelTester()
12895 .batch_size(batch_size)
12896 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x2), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
12897 }
12898 }
12899
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X2,inplace)12900 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X2, inplace) {
12901 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
12902 VUnOpMicrokernelTester()
12903 .batch_size(batch_size)
12904 .inplace(true)
12905 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x2), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
12906 }
12907 }
12908
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X2,prescale)12909 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X2, prescale) {
12910 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
12911 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
12912 VUnOpMicrokernelTester()
12913 .batch_size(batch_size)
12914 .prescale(prescale)
12915 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x2), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
12916 }
12917 }
12918 }
12919
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X2,alpha)12920 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X2, alpha) {
12921 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
12922 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
12923 VUnOpMicrokernelTester()
12924 .batch_size(batch_size)
12925 .alpha(alpha)
12926 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x2), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
12927 }
12928 }
12929 }
12930
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X2,beta)12931 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X2, beta) {
12932 for (float beta : std::vector<float>({0.3f, 3.0f})) {
12933 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
12934 VUnOpMicrokernelTester()
12935 .batch_size(batch_size)
12936 .beta(beta)
12937 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x2), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
12938 }
12939 }
12940 }
12941
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X3,batch_eq_3)12942 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X3, batch_eq_3) {
12943 VUnOpMicrokernelTester()
12944 .batch_size(3)
12945 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x3), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
12946 }
12947
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X3,batch_div_3)12948 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X3, batch_div_3) {
12949 for (size_t batch_size = 6; batch_size < 30; batch_size += 3) {
12950 VUnOpMicrokernelTester()
12951 .batch_size(batch_size)
12952 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x3), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
12953 }
12954 }
12955
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X3,batch_lt_3)12956 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X3, batch_lt_3) {
12957 for (size_t batch_size = 1; batch_size < 3; batch_size++) {
12958 VUnOpMicrokernelTester()
12959 .batch_size(batch_size)
12960 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x3), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
12961 }
12962 }
12963
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X3,batch_gt_3)12964 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X3, batch_gt_3) {
12965 for (size_t batch_size = 4; batch_size < 6; batch_size++) {
12966 VUnOpMicrokernelTester()
12967 .batch_size(batch_size)
12968 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x3), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
12969 }
12970 }
12971
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X3,inplace)12972 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X3, inplace) {
12973 for (size_t batch_size = 1; batch_size <= 15; batch_size += 2) {
12974 VUnOpMicrokernelTester()
12975 .batch_size(batch_size)
12976 .inplace(true)
12977 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x3), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
12978 }
12979 }
12980
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X3,prescale)12981 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X3, prescale) {
12982 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
12983 for (size_t batch_size = 1; batch_size <= 15; batch_size += 2) {
12984 VUnOpMicrokernelTester()
12985 .batch_size(batch_size)
12986 .prescale(prescale)
12987 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x3), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
12988 }
12989 }
12990 }
12991
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X3,alpha)12992 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X3, alpha) {
12993 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
12994 for (size_t batch_size = 1; batch_size <= 15; batch_size += 2) {
12995 VUnOpMicrokernelTester()
12996 .batch_size(batch_size)
12997 .alpha(alpha)
12998 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x3), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
12999 }
13000 }
13001 }
13002
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X3,beta)13003 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X3, beta) {
13004 for (float beta : std::vector<float>({0.3f, 3.0f})) {
13005 for (size_t batch_size = 1; batch_size <= 15; batch_size += 2) {
13006 VUnOpMicrokernelTester()
13007 .batch_size(batch_size)
13008 .beta(beta)
13009 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x3), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13010 }
13011 }
13012 }
13013
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X4,batch_eq_4)13014 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X4, batch_eq_4) {
13015 VUnOpMicrokernelTester()
13016 .batch_size(4)
13017 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13018 }
13019
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X4,batch_div_4)13020 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X4, batch_div_4) {
13021 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
13022 VUnOpMicrokernelTester()
13023 .batch_size(batch_size)
13024 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13025 }
13026 }
13027
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X4,batch_lt_4)13028 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X4, batch_lt_4) {
13029 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
13030 VUnOpMicrokernelTester()
13031 .batch_size(batch_size)
13032 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13033 }
13034 }
13035
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X4,batch_gt_4)13036 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X4, batch_gt_4) {
13037 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
13038 VUnOpMicrokernelTester()
13039 .batch_size(batch_size)
13040 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13041 }
13042 }
13043
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X4,inplace)13044 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X4, inplace) {
13045 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
13046 VUnOpMicrokernelTester()
13047 .batch_size(batch_size)
13048 .inplace(true)
13049 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13050 }
13051 }
13052
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X4,prescale)13053 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X4, prescale) {
13054 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
13055 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
13056 VUnOpMicrokernelTester()
13057 .batch_size(batch_size)
13058 .prescale(prescale)
13059 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13060 }
13061 }
13062 }
13063
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X4,alpha)13064 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X4, alpha) {
13065 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
13066 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
13067 VUnOpMicrokernelTester()
13068 .batch_size(batch_size)
13069 .alpha(alpha)
13070 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13071 }
13072 }
13073 }
13074
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X4,beta)13075 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X4, beta) {
13076 for (float beta : std::vector<float>({0.3f, 3.0f})) {
13077 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
13078 VUnOpMicrokernelTester()
13079 .batch_size(batch_size)
13080 .beta(beta)
13081 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13082 }
13083 }
13084 }
13085
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X5,batch_eq_5)13086 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X5, batch_eq_5) {
13087 VUnOpMicrokernelTester()
13088 .batch_size(5)
13089 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13090 }
13091
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X5,batch_div_5)13092 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X5, batch_div_5) {
13093 for (size_t batch_size = 10; batch_size < 50; batch_size += 5) {
13094 VUnOpMicrokernelTester()
13095 .batch_size(batch_size)
13096 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13097 }
13098 }
13099
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X5,batch_lt_5)13100 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X5, batch_lt_5) {
13101 for (size_t batch_size = 1; batch_size < 5; batch_size++) {
13102 VUnOpMicrokernelTester()
13103 .batch_size(batch_size)
13104 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13105 }
13106 }
13107
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X5,batch_gt_5)13108 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X5, batch_gt_5) {
13109 for (size_t batch_size = 6; batch_size < 10; batch_size++) {
13110 VUnOpMicrokernelTester()
13111 .batch_size(batch_size)
13112 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13113 }
13114 }
13115
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X5,inplace)13116 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X5, inplace) {
13117 for (size_t batch_size = 1; batch_size <= 25; batch_size += 4) {
13118 VUnOpMicrokernelTester()
13119 .batch_size(batch_size)
13120 .inplace(true)
13121 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13122 }
13123 }
13124
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X5,prescale)13125 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X5, prescale) {
13126 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
13127 for (size_t batch_size = 1; batch_size <= 25; batch_size += 4) {
13128 VUnOpMicrokernelTester()
13129 .batch_size(batch_size)
13130 .prescale(prescale)
13131 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13132 }
13133 }
13134 }
13135
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X5,alpha)13136 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X5, alpha) {
13137 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
13138 for (size_t batch_size = 1; batch_size <= 25; batch_size += 4) {
13139 VUnOpMicrokernelTester()
13140 .batch_size(batch_size)
13141 .alpha(alpha)
13142 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13143 }
13144 }
13145 }
13146
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X5,beta)13147 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X5, beta) {
13148 for (float beta : std::vector<float>({0.3f, 3.0f})) {
13149 for (size_t batch_size = 1; batch_size <= 25; batch_size += 4) {
13150 VUnOpMicrokernelTester()
13151 .batch_size(batch_size)
13152 .beta(beta)
13153 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13154 }
13155 }
13156 }
13157
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X6,batch_eq_6)13158 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X6, batch_eq_6) {
13159 VUnOpMicrokernelTester()
13160 .batch_size(6)
13161 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13162 }
13163
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X6,batch_div_6)13164 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X6, batch_div_6) {
13165 for (size_t batch_size = 12; batch_size < 60; batch_size += 6) {
13166 VUnOpMicrokernelTester()
13167 .batch_size(batch_size)
13168 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13169 }
13170 }
13171
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X6,batch_lt_6)13172 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X6, batch_lt_6) {
13173 for (size_t batch_size = 1; batch_size < 6; batch_size++) {
13174 VUnOpMicrokernelTester()
13175 .batch_size(batch_size)
13176 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13177 }
13178 }
13179
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X6,batch_gt_6)13180 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X6, batch_gt_6) {
13181 for (size_t batch_size = 7; batch_size < 12; batch_size++) {
13182 VUnOpMicrokernelTester()
13183 .batch_size(batch_size)
13184 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13185 }
13186 }
13187
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X6,inplace)13188 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X6, inplace) {
13189 for (size_t batch_size = 1; batch_size <= 30; batch_size += 5) {
13190 VUnOpMicrokernelTester()
13191 .batch_size(batch_size)
13192 .inplace(true)
13193 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13194 }
13195 }
13196
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X6,prescale)13197 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X6, prescale) {
13198 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
13199 for (size_t batch_size = 1; batch_size <= 30; batch_size += 5) {
13200 VUnOpMicrokernelTester()
13201 .batch_size(batch_size)
13202 .prescale(prescale)
13203 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13204 }
13205 }
13206 }
13207
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X6,alpha)13208 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X6, alpha) {
13209 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
13210 for (size_t batch_size = 1; batch_size <= 30; batch_size += 5) {
13211 VUnOpMicrokernelTester()
13212 .batch_size(batch_size)
13213 .alpha(alpha)
13214 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13215 }
13216 }
13217 }
13218
TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X6,beta)13219 TEST(F32_VELU__SCALAR_RR2_LUT16_P3_X6, beta) {
13220 for (float beta : std::vector<float>({0.3f, 3.0f})) {
13221 for (size_t batch_size = 1; batch_size <= 30; batch_size += 5) {
13222 VUnOpMicrokernelTester()
13223 .batch_size(batch_size)
13224 .beta(beta)
13225 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13226 }
13227 }
13228 }
13229
TEST(F32_VELU__SCALAR_RR2_P6_X1,batch_eq_1)13230 TEST(F32_VELU__SCALAR_RR2_P6_X1, batch_eq_1) {
13231 VUnOpMicrokernelTester()
13232 .batch_size(1)
13233 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_p6_x1), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13234 }
13235
TEST(F32_VELU__SCALAR_RR2_P6_X1,batch_gt_1)13236 TEST(F32_VELU__SCALAR_RR2_P6_X1, batch_gt_1) {
13237 for (size_t batch_size = 2; batch_size < 10; batch_size++) {
13238 VUnOpMicrokernelTester()
13239 .batch_size(batch_size)
13240 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_p6_x1), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13241 }
13242 }
13243
TEST(F32_VELU__SCALAR_RR2_P6_X1,inplace)13244 TEST(F32_VELU__SCALAR_RR2_P6_X1, inplace) {
13245 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
13246 VUnOpMicrokernelTester()
13247 .batch_size(batch_size)
13248 .inplace(true)
13249 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_p6_x1), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13250 }
13251 }
13252
TEST(F32_VELU__SCALAR_RR2_P6_X1,prescale)13253 TEST(F32_VELU__SCALAR_RR2_P6_X1, prescale) {
13254 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
13255 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
13256 VUnOpMicrokernelTester()
13257 .batch_size(batch_size)
13258 .prescale(prescale)
13259 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_p6_x1), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13260 }
13261 }
13262 }
13263
TEST(F32_VELU__SCALAR_RR2_P6_X1,alpha)13264 TEST(F32_VELU__SCALAR_RR2_P6_X1, alpha) {
13265 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
13266 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
13267 VUnOpMicrokernelTester()
13268 .batch_size(batch_size)
13269 .alpha(alpha)
13270 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_p6_x1), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13271 }
13272 }
13273 }
13274
TEST(F32_VELU__SCALAR_RR2_P6_X1,beta)13275 TEST(F32_VELU__SCALAR_RR2_P6_X1, beta) {
13276 for (float beta : std::vector<float>({0.3f, 3.0f})) {
13277 for (size_t batch_size = 1; batch_size <= 5; batch_size += 1) {
13278 VUnOpMicrokernelTester()
13279 .batch_size(batch_size)
13280 .beta(beta)
13281 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_p6_x1), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13282 }
13283 }
13284 }
13285
TEST(F32_VELU__SCALAR_RR2_P6_X2,batch_eq_2)13286 TEST(F32_VELU__SCALAR_RR2_P6_X2, batch_eq_2) {
13287 VUnOpMicrokernelTester()
13288 .batch_size(2)
13289 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_p6_x2), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13290 }
13291
TEST(F32_VELU__SCALAR_RR2_P6_X2,batch_div_2)13292 TEST(F32_VELU__SCALAR_RR2_P6_X2, batch_div_2) {
13293 for (size_t batch_size = 4; batch_size < 20; batch_size += 2) {
13294 VUnOpMicrokernelTester()
13295 .batch_size(batch_size)
13296 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_p6_x2), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13297 }
13298 }
13299
TEST(F32_VELU__SCALAR_RR2_P6_X2,batch_lt_2)13300 TEST(F32_VELU__SCALAR_RR2_P6_X2, batch_lt_2) {
13301 for (size_t batch_size = 1; batch_size < 2; batch_size++) {
13302 VUnOpMicrokernelTester()
13303 .batch_size(batch_size)
13304 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_p6_x2), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13305 }
13306 }
13307
TEST(F32_VELU__SCALAR_RR2_P6_X2,batch_gt_2)13308 TEST(F32_VELU__SCALAR_RR2_P6_X2, batch_gt_2) {
13309 for (size_t batch_size = 3; batch_size < 4; batch_size++) {
13310 VUnOpMicrokernelTester()
13311 .batch_size(batch_size)
13312 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_p6_x2), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13313 }
13314 }
13315
TEST(F32_VELU__SCALAR_RR2_P6_X2,inplace)13316 TEST(F32_VELU__SCALAR_RR2_P6_X2, inplace) {
13317 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
13318 VUnOpMicrokernelTester()
13319 .batch_size(batch_size)
13320 .inplace(true)
13321 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_p6_x2), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13322 }
13323 }
13324
TEST(F32_VELU__SCALAR_RR2_P6_X2,prescale)13325 TEST(F32_VELU__SCALAR_RR2_P6_X2, prescale) {
13326 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
13327 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
13328 VUnOpMicrokernelTester()
13329 .batch_size(batch_size)
13330 .prescale(prescale)
13331 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_p6_x2), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13332 }
13333 }
13334 }
13335
TEST(F32_VELU__SCALAR_RR2_P6_X2,alpha)13336 TEST(F32_VELU__SCALAR_RR2_P6_X2, alpha) {
13337 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
13338 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
13339 VUnOpMicrokernelTester()
13340 .batch_size(batch_size)
13341 .alpha(alpha)
13342 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_p6_x2), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13343 }
13344 }
13345 }
13346
TEST(F32_VELU__SCALAR_RR2_P6_X2,beta)13347 TEST(F32_VELU__SCALAR_RR2_P6_X2, beta) {
13348 for (float beta : std::vector<float>({0.3f, 3.0f})) {
13349 for (size_t batch_size = 1; batch_size <= 10; batch_size += 1) {
13350 VUnOpMicrokernelTester()
13351 .batch_size(batch_size)
13352 .beta(beta)
13353 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_p6_x2), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13354 }
13355 }
13356 }
13357
TEST(F32_VELU__SCALAR_RR2_P6_X3,batch_eq_3)13358 TEST(F32_VELU__SCALAR_RR2_P6_X3, batch_eq_3) {
13359 VUnOpMicrokernelTester()
13360 .batch_size(3)
13361 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_p6_x3), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13362 }
13363
TEST(F32_VELU__SCALAR_RR2_P6_X3,batch_div_3)13364 TEST(F32_VELU__SCALAR_RR2_P6_X3, batch_div_3) {
13365 for (size_t batch_size = 6; batch_size < 30; batch_size += 3) {
13366 VUnOpMicrokernelTester()
13367 .batch_size(batch_size)
13368 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_p6_x3), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13369 }
13370 }
13371
TEST(F32_VELU__SCALAR_RR2_P6_X3,batch_lt_3)13372 TEST(F32_VELU__SCALAR_RR2_P6_X3, batch_lt_3) {
13373 for (size_t batch_size = 1; batch_size < 3; batch_size++) {
13374 VUnOpMicrokernelTester()
13375 .batch_size(batch_size)
13376 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_p6_x3), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13377 }
13378 }
13379
TEST(F32_VELU__SCALAR_RR2_P6_X3,batch_gt_3)13380 TEST(F32_VELU__SCALAR_RR2_P6_X3, batch_gt_3) {
13381 for (size_t batch_size = 4; batch_size < 6; batch_size++) {
13382 VUnOpMicrokernelTester()
13383 .batch_size(batch_size)
13384 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_p6_x3), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13385 }
13386 }
13387
TEST(F32_VELU__SCALAR_RR2_P6_X3,inplace)13388 TEST(F32_VELU__SCALAR_RR2_P6_X3, inplace) {
13389 for (size_t batch_size = 1; batch_size <= 15; batch_size += 2) {
13390 VUnOpMicrokernelTester()
13391 .batch_size(batch_size)
13392 .inplace(true)
13393 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_p6_x3), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13394 }
13395 }
13396
TEST(F32_VELU__SCALAR_RR2_P6_X3,prescale)13397 TEST(F32_VELU__SCALAR_RR2_P6_X3, prescale) {
13398 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
13399 for (size_t batch_size = 1; batch_size <= 15; batch_size += 2) {
13400 VUnOpMicrokernelTester()
13401 .batch_size(batch_size)
13402 .prescale(prescale)
13403 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_p6_x3), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13404 }
13405 }
13406 }
13407
TEST(F32_VELU__SCALAR_RR2_P6_X3,alpha)13408 TEST(F32_VELU__SCALAR_RR2_P6_X3, alpha) {
13409 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
13410 for (size_t batch_size = 1; batch_size <= 15; batch_size += 2) {
13411 VUnOpMicrokernelTester()
13412 .batch_size(batch_size)
13413 .alpha(alpha)
13414 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_p6_x3), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13415 }
13416 }
13417 }
13418
TEST(F32_VELU__SCALAR_RR2_P6_X3,beta)13419 TEST(F32_VELU__SCALAR_RR2_P6_X3, beta) {
13420 for (float beta : std::vector<float>({0.3f, 3.0f})) {
13421 for (size_t batch_size = 1; batch_size <= 15; batch_size += 2) {
13422 VUnOpMicrokernelTester()
13423 .batch_size(batch_size)
13424 .beta(beta)
13425 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_p6_x3), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13426 }
13427 }
13428 }
13429
TEST(F32_VELU__SCALAR_RR2_P6_X4,batch_eq_4)13430 TEST(F32_VELU__SCALAR_RR2_P6_X4, batch_eq_4) {
13431 VUnOpMicrokernelTester()
13432 .batch_size(4)
13433 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_p6_x4), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13434 }
13435
TEST(F32_VELU__SCALAR_RR2_P6_X4,batch_div_4)13436 TEST(F32_VELU__SCALAR_RR2_P6_X4, batch_div_4) {
13437 for (size_t batch_size = 8; batch_size < 40; batch_size += 4) {
13438 VUnOpMicrokernelTester()
13439 .batch_size(batch_size)
13440 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_p6_x4), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13441 }
13442 }
13443
TEST(F32_VELU__SCALAR_RR2_P6_X4,batch_lt_4)13444 TEST(F32_VELU__SCALAR_RR2_P6_X4, batch_lt_4) {
13445 for (size_t batch_size = 1; batch_size < 4; batch_size++) {
13446 VUnOpMicrokernelTester()
13447 .batch_size(batch_size)
13448 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_p6_x4), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13449 }
13450 }
13451
TEST(F32_VELU__SCALAR_RR2_P6_X4,batch_gt_4)13452 TEST(F32_VELU__SCALAR_RR2_P6_X4, batch_gt_4) {
13453 for (size_t batch_size = 5; batch_size < 8; batch_size++) {
13454 VUnOpMicrokernelTester()
13455 .batch_size(batch_size)
13456 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_p6_x4), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13457 }
13458 }
13459
TEST(F32_VELU__SCALAR_RR2_P6_X4,inplace)13460 TEST(F32_VELU__SCALAR_RR2_P6_X4, inplace) {
13461 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
13462 VUnOpMicrokernelTester()
13463 .batch_size(batch_size)
13464 .inplace(true)
13465 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_p6_x4), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13466 }
13467 }
13468
TEST(F32_VELU__SCALAR_RR2_P6_X4,prescale)13469 TEST(F32_VELU__SCALAR_RR2_P6_X4, prescale) {
13470 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
13471 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
13472 VUnOpMicrokernelTester()
13473 .batch_size(batch_size)
13474 .prescale(prescale)
13475 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_p6_x4), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13476 }
13477 }
13478 }
13479
TEST(F32_VELU__SCALAR_RR2_P6_X4,alpha)13480 TEST(F32_VELU__SCALAR_RR2_P6_X4, alpha) {
13481 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
13482 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
13483 VUnOpMicrokernelTester()
13484 .batch_size(batch_size)
13485 .alpha(alpha)
13486 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_p6_x4), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13487 }
13488 }
13489 }
13490
TEST(F32_VELU__SCALAR_RR2_P6_X4,beta)13491 TEST(F32_VELU__SCALAR_RR2_P6_X4, beta) {
13492 for (float beta : std::vector<float>({0.3f, 3.0f})) {
13493 for (size_t batch_size = 1; batch_size <= 20; batch_size += 3) {
13494 VUnOpMicrokernelTester()
13495 .batch_size(batch_size)
13496 .beta(beta)
13497 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_p6_x4), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13498 }
13499 }
13500 }
13501
TEST(F32_VELU__SCALAR_RR2_P6_X5,batch_eq_5)13502 TEST(F32_VELU__SCALAR_RR2_P6_X5, batch_eq_5) {
13503 VUnOpMicrokernelTester()
13504 .batch_size(5)
13505 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_p6_x5), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13506 }
13507
TEST(F32_VELU__SCALAR_RR2_P6_X5,batch_div_5)13508 TEST(F32_VELU__SCALAR_RR2_P6_X5, batch_div_5) {
13509 for (size_t batch_size = 10; batch_size < 50; batch_size += 5) {
13510 VUnOpMicrokernelTester()
13511 .batch_size(batch_size)
13512 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_p6_x5), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13513 }
13514 }
13515
TEST(F32_VELU__SCALAR_RR2_P6_X5,batch_lt_5)13516 TEST(F32_VELU__SCALAR_RR2_P6_X5, batch_lt_5) {
13517 for (size_t batch_size = 1; batch_size < 5; batch_size++) {
13518 VUnOpMicrokernelTester()
13519 .batch_size(batch_size)
13520 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_p6_x5), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13521 }
13522 }
13523
TEST(F32_VELU__SCALAR_RR2_P6_X5,batch_gt_5)13524 TEST(F32_VELU__SCALAR_RR2_P6_X5, batch_gt_5) {
13525 for (size_t batch_size = 6; batch_size < 10; batch_size++) {
13526 VUnOpMicrokernelTester()
13527 .batch_size(batch_size)
13528 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_p6_x5), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13529 }
13530 }
13531
TEST(F32_VELU__SCALAR_RR2_P6_X5,inplace)13532 TEST(F32_VELU__SCALAR_RR2_P6_X5, inplace) {
13533 for (size_t batch_size = 1; batch_size <= 25; batch_size += 4) {
13534 VUnOpMicrokernelTester()
13535 .batch_size(batch_size)
13536 .inplace(true)
13537 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_p6_x5), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13538 }
13539 }
13540
TEST(F32_VELU__SCALAR_RR2_P6_X5,prescale)13541 TEST(F32_VELU__SCALAR_RR2_P6_X5, prescale) {
13542 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
13543 for (size_t batch_size = 1; batch_size <= 25; batch_size += 4) {
13544 VUnOpMicrokernelTester()
13545 .batch_size(batch_size)
13546 .prescale(prescale)
13547 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_p6_x5), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13548 }
13549 }
13550 }
13551
TEST(F32_VELU__SCALAR_RR2_P6_X5,alpha)13552 TEST(F32_VELU__SCALAR_RR2_P6_X5, alpha) {
13553 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
13554 for (size_t batch_size = 1; batch_size <= 25; batch_size += 4) {
13555 VUnOpMicrokernelTester()
13556 .batch_size(batch_size)
13557 .alpha(alpha)
13558 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_p6_x5), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13559 }
13560 }
13561 }
13562
TEST(F32_VELU__SCALAR_RR2_P6_X5,beta)13563 TEST(F32_VELU__SCALAR_RR2_P6_X5, beta) {
13564 for (float beta : std::vector<float>({0.3f, 3.0f})) {
13565 for (size_t batch_size = 1; batch_size <= 25; batch_size += 4) {
13566 VUnOpMicrokernelTester()
13567 .batch_size(batch_size)
13568 .beta(beta)
13569 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_p6_x5), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13570 }
13571 }
13572 }
13573
TEST(F32_VELU__SCALAR_RR2_P6_X6,batch_eq_6)13574 TEST(F32_VELU__SCALAR_RR2_P6_X6, batch_eq_6) {
13575 VUnOpMicrokernelTester()
13576 .batch_size(6)
13577 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_p6_x6), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13578 }
13579
TEST(F32_VELU__SCALAR_RR2_P6_X6,batch_div_6)13580 TEST(F32_VELU__SCALAR_RR2_P6_X6, batch_div_6) {
13581 for (size_t batch_size = 12; batch_size < 60; batch_size += 6) {
13582 VUnOpMicrokernelTester()
13583 .batch_size(batch_size)
13584 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_p6_x6), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13585 }
13586 }
13587
TEST(F32_VELU__SCALAR_RR2_P6_X6,batch_lt_6)13588 TEST(F32_VELU__SCALAR_RR2_P6_X6, batch_lt_6) {
13589 for (size_t batch_size = 1; batch_size < 6; batch_size++) {
13590 VUnOpMicrokernelTester()
13591 .batch_size(batch_size)
13592 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_p6_x6), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13593 }
13594 }
13595
TEST(F32_VELU__SCALAR_RR2_P6_X6,batch_gt_6)13596 TEST(F32_VELU__SCALAR_RR2_P6_X6, batch_gt_6) {
13597 for (size_t batch_size = 7; batch_size < 12; batch_size++) {
13598 VUnOpMicrokernelTester()
13599 .batch_size(batch_size)
13600 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_p6_x6), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13601 }
13602 }
13603
TEST(F32_VELU__SCALAR_RR2_P6_X6,inplace)13604 TEST(F32_VELU__SCALAR_RR2_P6_X6, inplace) {
13605 for (size_t batch_size = 1; batch_size <= 30; batch_size += 5) {
13606 VUnOpMicrokernelTester()
13607 .batch_size(batch_size)
13608 .inplace(true)
13609 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_p6_x6), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13610 }
13611 }
13612
TEST(F32_VELU__SCALAR_RR2_P6_X6,prescale)13613 TEST(F32_VELU__SCALAR_RR2_P6_X6, prescale) {
13614 for (float prescale : std::vector<float>({0.1f, 10.0f})) {
13615 for (size_t batch_size = 1; batch_size <= 30; batch_size += 5) {
13616 VUnOpMicrokernelTester()
13617 .batch_size(batch_size)
13618 .prescale(prescale)
13619 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_p6_x6), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13620 }
13621 }
13622 }
13623
TEST(F32_VELU__SCALAR_RR2_P6_X6,alpha)13624 TEST(F32_VELU__SCALAR_RR2_P6_X6, alpha) {
13625 for (float alpha : std::vector<float>({0.3f, 3.0f})) {
13626 for (size_t batch_size = 1; batch_size <= 30; batch_size += 5) {
13627 VUnOpMicrokernelTester()
13628 .batch_size(batch_size)
13629 .alpha(alpha)
13630 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_p6_x6), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13631 }
13632 }
13633 }
13634
TEST(F32_VELU__SCALAR_RR2_P6_X6,beta)13635 TEST(F32_VELU__SCALAR_RR2_P6_X6, beta) {
13636 for (float beta : std::vector<float>({0.3f, 3.0f})) {
13637 for (size_t batch_size = 1; batch_size <= 30; batch_size += 5) {
13638 VUnOpMicrokernelTester()
13639 .batch_size(batch_size)
13640 .beta(beta)
13641 .Test(xnn_f32_vunary_ukernel_function(xnn_f32_velu_ukernel__scalar_rr2_p6_x6), VUnOpMicrokernelTester::OpType::ELU, VUnOpMicrokernelTester::Variant::Scalar);
13642 }
13643 }
13644 }