1 // Copyright (c) Facebook, Inc. and its affiliates. 2 // All rights reserved. 3 // 4 // Copyright 2019 Google LLC 5 // 6 // This source code is licensed under the BSD-style license found in the 7 // LICENSE file in the root directory of this source tree. 8 // 9 // Auto-generated file. Do not edit! 10 // Specification: test/f16-dwconv-minmax.yaml 11 // Generator: tools/generate-dwconv-test.py 12 13 14 #include <gtest/gtest.h> 15 16 #include <xnnpack/common.h> 17 #include <xnnpack/isa-checks.h> 18 19 #include <xnnpack/dwconv.h> 20 #include "dwconv-microkernel-tester.h" 21 22 23 #if XNN_ARCH_ARM64 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH,c_eq_8)24 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, c_eq_8) { 25 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 26 DWConvMicrokernelTester() 27 .cr(8) 28 .kr(25) 29 .channels(8) 30 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith); 31 } 32 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH,c_div_8)33 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, c_div_8) { 34 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 35 for (uint32_t channels = 16; channels < 128; channels += 24) { 36 DWConvMicrokernelTester() 37 .cr(8) 38 .kr(25) 39 .channels(channels) 40 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith); 41 } 42 } 43 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH,c_div_8_with_qmin)44 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, c_div_8_with_qmin) { 45 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 46 for (uint32_t channels = 16; channels < 128; channels += 24) { 47 DWConvMicrokernelTester() 48 .cr(8) 49 .kr(25) 50 .channels(channels) 51 .qmin(128) 52 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith); 53 } 54 } 55 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH,c_div_8_with_qmax)56 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, c_div_8_with_qmax) { 57 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 58 for (uint32_t channels = 16; channels < 128; channels += 24) { 59 DWConvMicrokernelTester() 60 .cr(8) 61 .kr(25) 62 .channels(channels) 63 .qmax(128) 64 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith); 65 } 66 } 67 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH,c_lt_8)68 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, c_lt_8) { 69 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 70 for (uint32_t channels = 1; channels < 8; channels++) { 71 DWConvMicrokernelTester() 72 .cr(8) 73 .kr(25) 74 .channels(channels) 75 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith); 76 } 77 } 78 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH,c_gt_8)79 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, c_gt_8) { 80 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 81 for (uint32_t channels = 9; channels < 16; channels++) { 82 DWConvMicrokernelTester() 83 .cr(8) 84 .kr(25) 85 .channels(channels) 86 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith); 87 } 88 } 89 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH,c_gt_8_with_qmin)90 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, c_gt_8_with_qmin) { 91 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 92 for (uint32_t channels = 9; channels < 16; channels++) { 93 DWConvMicrokernelTester() 94 .cr(8) 95 .kr(25) 96 .channels(channels) 97 .qmin(128) 98 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith); 99 } 100 } 101 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH,c_gt_8_with_qmax)102 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, c_gt_8_with_qmax) { 103 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 104 for (uint32_t channels = 9; channels < 16; channels++) { 105 DWConvMicrokernelTester() 106 .cr(8) 107 .kr(25) 108 .channels(channels) 109 .qmax(128) 110 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith); 111 } 112 } 113 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH,multipixel)114 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, multipixel) { 115 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 116 for (size_t channels = 1; channels <= 40; channels += 7) { 117 DWConvMicrokernelTester() 118 .cr(8) 119 .kr(25) 120 .channels(channels) 121 .width(3) 122 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith); 123 } 124 } 125 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH,multipixel_with_step)126 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, multipixel_with_step) { 127 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 128 for (size_t channels = 1; channels <= 40; channels += 7) { 129 for (size_t step = 2; step <= 25; step++) { 130 DWConvMicrokernelTester() 131 .cr(8) 132 .kr(25) 133 .channels(channels) 134 .width(3) 135 .step(step) 136 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith); 137 } 138 } 139 } 140 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH,multipixel_with_output_stride)141 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, multipixel_with_output_stride) { 142 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 143 for (size_t channels = 1; channels <= 40; channels += 7) { 144 DWConvMicrokernelTester() 145 .cr(8) 146 .kr(25) 147 .channels(8) 148 .width(5) 149 .output_stride(43) 150 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith); 151 } 152 } 153 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH,multipixel_with_qmin)154 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, multipixel_with_qmin) { 155 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 156 for (size_t channels = 1; channels <= 40; channels += 7) { 157 DWConvMicrokernelTester() 158 .cr(8) 159 .kr(25) 160 .channels(channels) 161 .width(3) 162 .qmin(128) 163 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith); 164 } 165 } 166 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH,multipixel_with_qmax)167 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, multipixel_with_qmax) { 168 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 169 for (size_t channels = 1; channels <= 40; channels += 7) { 170 DWConvMicrokernelTester() 171 .cr(8) 172 .kr(25) 173 .channels(channels) 174 .width(3) 175 .qmax(128) 176 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith); 177 } 178 } 179 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH,input_offset)180 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, input_offset) { 181 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 182 for (uint32_t channels = 16; channels < 128; channels += 24) { 183 DWConvMicrokernelTester() 184 .cr(8) 185 .kr(25) 186 .channels(channels) 187 .input_offset(176) 188 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith); 189 } 190 } 191 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH,zero)192 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH, zero) { 193 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 194 for (uint32_t mz = 0; mz < 25; mz++) { 195 for (uint32_t channels = 16; channels < 128; channels += 24) { 196 DWConvMicrokernelTester() 197 .cr(8) 198 .kr(25) 199 .channels(channels) 200 .input_offset(176) 201 .zero_index(mz) 202 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith); 203 } 204 } 205 } 206 #endif // XNN_ARCH_ARM64 207 208 209 #if XNN_ARCH_ARM64 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2,c_eq_8)210 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, c_eq_8) { 211 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 212 DWConvMicrokernelTester() 213 .cr(8) 214 .kr(25) 215 .channels(8) 216 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2); 217 } 218 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2,c_div_8)219 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, c_div_8) { 220 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 221 for (uint32_t channels = 16; channels < 128; channels += 24) { 222 DWConvMicrokernelTester() 223 .cr(8) 224 .kr(25) 225 .channels(channels) 226 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2); 227 } 228 } 229 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2,c_div_8_with_qmin)230 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, c_div_8_with_qmin) { 231 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 232 for (uint32_t channels = 16; channels < 128; channels += 24) { 233 DWConvMicrokernelTester() 234 .cr(8) 235 .kr(25) 236 .channels(channels) 237 .qmin(128) 238 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2); 239 } 240 } 241 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2,c_div_8_with_qmax)242 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, c_div_8_with_qmax) { 243 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 244 for (uint32_t channels = 16; channels < 128; channels += 24) { 245 DWConvMicrokernelTester() 246 .cr(8) 247 .kr(25) 248 .channels(channels) 249 .qmax(128) 250 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2); 251 } 252 } 253 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2,c_lt_8)254 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, c_lt_8) { 255 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 256 for (uint32_t channels = 1; channels < 8; channels++) { 257 DWConvMicrokernelTester() 258 .cr(8) 259 .kr(25) 260 .channels(channels) 261 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2); 262 } 263 } 264 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2,c_gt_8)265 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, c_gt_8) { 266 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 267 for (uint32_t channels = 9; channels < 16; channels++) { 268 DWConvMicrokernelTester() 269 .cr(8) 270 .kr(25) 271 .channels(channels) 272 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2); 273 } 274 } 275 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2,c_gt_8_with_qmin)276 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, c_gt_8_with_qmin) { 277 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 278 for (uint32_t channels = 9; channels < 16; channels++) { 279 DWConvMicrokernelTester() 280 .cr(8) 281 .kr(25) 282 .channels(channels) 283 .qmin(128) 284 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2); 285 } 286 } 287 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2,c_gt_8_with_qmax)288 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, c_gt_8_with_qmax) { 289 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 290 for (uint32_t channels = 9; channels < 16; channels++) { 291 DWConvMicrokernelTester() 292 .cr(8) 293 .kr(25) 294 .channels(channels) 295 .qmax(128) 296 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2); 297 } 298 } 299 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2,multipixel)300 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, multipixel) { 301 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 302 for (size_t channels = 1; channels <= 40; channels += 7) { 303 DWConvMicrokernelTester() 304 .cr(8) 305 .kr(25) 306 .channels(channels) 307 .width(3) 308 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2); 309 } 310 } 311 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2,multipixel_with_step)312 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, multipixel_with_step) { 313 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 314 for (size_t channels = 1; channels <= 40; channels += 7) { 315 for (size_t step = 2; step <= 25; step++) { 316 DWConvMicrokernelTester() 317 .cr(8) 318 .kr(25) 319 .channels(channels) 320 .width(3) 321 .step(step) 322 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2); 323 } 324 } 325 } 326 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2,multipixel_with_output_stride)327 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, multipixel_with_output_stride) { 328 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 329 for (size_t channels = 1; channels <= 40; channels += 7) { 330 DWConvMicrokernelTester() 331 .cr(8) 332 .kr(25) 333 .channels(8) 334 .width(5) 335 .output_stride(43) 336 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2); 337 } 338 } 339 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2,multipixel_with_qmin)340 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, multipixel_with_qmin) { 341 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 342 for (size_t channels = 1; channels <= 40; channels += 7) { 343 DWConvMicrokernelTester() 344 .cr(8) 345 .kr(25) 346 .channels(channels) 347 .width(3) 348 .qmin(128) 349 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2); 350 } 351 } 352 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2,multipixel_with_qmax)353 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, multipixel_with_qmax) { 354 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 355 for (size_t channels = 1; channels <= 40; channels += 7) { 356 DWConvMicrokernelTester() 357 .cr(8) 358 .kr(25) 359 .channels(channels) 360 .width(3) 361 .qmax(128) 362 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2); 363 } 364 } 365 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2,input_offset)366 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, input_offset) { 367 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 368 for (uint32_t channels = 16; channels < 128; channels += 24) { 369 DWConvMicrokernelTester() 370 .cr(8) 371 .kr(25) 372 .channels(channels) 373 .input_offset(176) 374 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2); 375 } 376 } 377 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2,zero)378 TEST(F16_DWCONV_MINMAX_UP8X25__NEONFP16ARITH_ACC2, zero) { 379 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 380 for (uint32_t mz = 0; mz < 25; mz++) { 381 for (uint32_t channels = 16; channels < 128; channels += 24) { 382 DWConvMicrokernelTester() 383 .cr(8) 384 .kr(25) 385 .channels(channels) 386 .input_offset(176) 387 .zero_index(mz) 388 .Test(xnn_f16_dwconv_minmax_ukernel_up8x25__neonfp16arith_acc2); 389 } 390 } 391 } 392 #endif // XNN_ARCH_ARM64 393 394 395 #if XNN_ARCH_ARM64 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH,c_eq_16)396 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, c_eq_16) { 397 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 398 DWConvMicrokernelTester() 399 .cr(16) 400 .kr(25) 401 .channels(16) 402 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith); 403 } 404 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH,c_div_16)405 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, c_div_16) { 406 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 407 for (uint32_t channels = 32; channels < 256; channels += 48) { 408 DWConvMicrokernelTester() 409 .cr(16) 410 .kr(25) 411 .channels(channels) 412 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith); 413 } 414 } 415 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH,c_div_16_with_qmin)416 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, c_div_16_with_qmin) { 417 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 418 for (uint32_t channels = 32; channels < 256; channels += 48) { 419 DWConvMicrokernelTester() 420 .cr(16) 421 .kr(25) 422 .channels(channels) 423 .qmin(128) 424 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith); 425 } 426 } 427 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH,c_div_16_with_qmax)428 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, c_div_16_with_qmax) { 429 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 430 for (uint32_t channels = 32; channels < 256; channels += 48) { 431 DWConvMicrokernelTester() 432 .cr(16) 433 .kr(25) 434 .channels(channels) 435 .qmax(128) 436 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith); 437 } 438 } 439 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH,c_lt_16)440 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, c_lt_16) { 441 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 442 for (uint32_t channels = 1; channels < 16; channels++) { 443 DWConvMicrokernelTester() 444 .cr(16) 445 .kr(25) 446 .channels(channels) 447 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith); 448 } 449 } 450 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH,c_gt_16)451 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, c_gt_16) { 452 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 453 for (uint32_t channels = 17; channels < 32; channels++) { 454 DWConvMicrokernelTester() 455 .cr(16) 456 .kr(25) 457 .channels(channels) 458 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith); 459 } 460 } 461 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH,c_gt_16_with_qmin)462 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, c_gt_16_with_qmin) { 463 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 464 for (uint32_t channels = 17; channels < 32; channels++) { 465 DWConvMicrokernelTester() 466 .cr(16) 467 .kr(25) 468 .channels(channels) 469 .qmin(128) 470 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith); 471 } 472 } 473 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH,c_gt_16_with_qmax)474 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, c_gt_16_with_qmax) { 475 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 476 for (uint32_t channels = 17; channels < 32; channels++) { 477 DWConvMicrokernelTester() 478 .cr(16) 479 .kr(25) 480 .channels(channels) 481 .qmax(128) 482 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith); 483 } 484 } 485 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH,multipixel)486 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, multipixel) { 487 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 488 for (size_t channels = 1; channels <= 80; channels += 15) { 489 DWConvMicrokernelTester() 490 .cr(16) 491 .kr(25) 492 .channels(channels) 493 .width(3) 494 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith); 495 } 496 } 497 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH,multipixel_with_step)498 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, multipixel_with_step) { 499 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 500 for (size_t channels = 1; channels <= 80; channels += 15) { 501 for (size_t step = 2; step <= 25; step++) { 502 DWConvMicrokernelTester() 503 .cr(16) 504 .kr(25) 505 .channels(channels) 506 .width(3) 507 .step(step) 508 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith); 509 } 510 } 511 } 512 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH,multipixel_with_output_stride)513 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, multipixel_with_output_stride) { 514 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 515 for (size_t channels = 1; channels <= 80; channels += 15) { 516 DWConvMicrokernelTester() 517 .cr(16) 518 .kr(25) 519 .channels(16) 520 .width(5) 521 .output_stride(83) 522 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith); 523 } 524 } 525 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH,multipixel_with_qmin)526 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, multipixel_with_qmin) { 527 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 528 for (size_t channels = 1; channels <= 80; channels += 15) { 529 DWConvMicrokernelTester() 530 .cr(16) 531 .kr(25) 532 .channels(channels) 533 .width(3) 534 .qmin(128) 535 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith); 536 } 537 } 538 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH,multipixel_with_qmax)539 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, multipixel_with_qmax) { 540 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 541 for (size_t channels = 1; channels <= 80; channels += 15) { 542 DWConvMicrokernelTester() 543 .cr(16) 544 .kr(25) 545 .channels(channels) 546 .width(3) 547 .qmax(128) 548 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith); 549 } 550 } 551 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH,input_offset)552 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, input_offset) { 553 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 554 for (uint32_t channels = 32; channels < 256; channels += 48) { 555 DWConvMicrokernelTester() 556 .cr(16) 557 .kr(25) 558 .channels(channels) 559 .input_offset(304) 560 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith); 561 } 562 } 563 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH,zero)564 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH, zero) { 565 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 566 for (uint32_t mz = 0; mz < 25; mz++) { 567 for (uint32_t channels = 32; channels < 256; channels += 48) { 568 DWConvMicrokernelTester() 569 .cr(16) 570 .kr(25) 571 .channels(channels) 572 .input_offset(304) 573 .zero_index(mz) 574 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith); 575 } 576 } 577 } 578 #endif // XNN_ARCH_ARM64 579 580 581 #if XNN_ARCH_ARM64 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2,c_eq_16)582 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, c_eq_16) { 583 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 584 DWConvMicrokernelTester() 585 .cr(16) 586 .kr(25) 587 .channels(16) 588 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2); 589 } 590 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2,c_div_16)591 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, c_div_16) { 592 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 593 for (uint32_t channels = 32; channels < 256; channels += 48) { 594 DWConvMicrokernelTester() 595 .cr(16) 596 .kr(25) 597 .channels(channels) 598 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2); 599 } 600 } 601 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2,c_div_16_with_qmin)602 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, c_div_16_with_qmin) { 603 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 604 for (uint32_t channels = 32; channels < 256; channels += 48) { 605 DWConvMicrokernelTester() 606 .cr(16) 607 .kr(25) 608 .channels(channels) 609 .qmin(128) 610 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2); 611 } 612 } 613 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2,c_div_16_with_qmax)614 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, c_div_16_with_qmax) { 615 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 616 for (uint32_t channels = 32; channels < 256; channels += 48) { 617 DWConvMicrokernelTester() 618 .cr(16) 619 .kr(25) 620 .channels(channels) 621 .qmax(128) 622 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2); 623 } 624 } 625 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2,c_lt_16)626 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, c_lt_16) { 627 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 628 for (uint32_t channels = 1; channels < 16; channels++) { 629 DWConvMicrokernelTester() 630 .cr(16) 631 .kr(25) 632 .channels(channels) 633 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2); 634 } 635 } 636 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2,c_gt_16)637 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, c_gt_16) { 638 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 639 for (uint32_t channels = 17; channels < 32; channels++) { 640 DWConvMicrokernelTester() 641 .cr(16) 642 .kr(25) 643 .channels(channels) 644 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2); 645 } 646 } 647 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2,c_gt_16_with_qmin)648 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, c_gt_16_with_qmin) { 649 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 650 for (uint32_t channels = 17; channels < 32; channels++) { 651 DWConvMicrokernelTester() 652 .cr(16) 653 .kr(25) 654 .channels(channels) 655 .qmin(128) 656 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2); 657 } 658 } 659 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2,c_gt_16_with_qmax)660 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, c_gt_16_with_qmax) { 661 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 662 for (uint32_t channels = 17; channels < 32; channels++) { 663 DWConvMicrokernelTester() 664 .cr(16) 665 .kr(25) 666 .channels(channels) 667 .qmax(128) 668 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2); 669 } 670 } 671 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2,multipixel)672 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, multipixel) { 673 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 674 for (size_t channels = 1; channels <= 80; channels += 15) { 675 DWConvMicrokernelTester() 676 .cr(16) 677 .kr(25) 678 .channels(channels) 679 .width(3) 680 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2); 681 } 682 } 683 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2,multipixel_with_step)684 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, multipixel_with_step) { 685 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 686 for (size_t channels = 1; channels <= 80; channels += 15) { 687 for (size_t step = 2; step <= 25; step++) { 688 DWConvMicrokernelTester() 689 .cr(16) 690 .kr(25) 691 .channels(channels) 692 .width(3) 693 .step(step) 694 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2); 695 } 696 } 697 } 698 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2,multipixel_with_output_stride)699 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, multipixel_with_output_stride) { 700 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 701 for (size_t channels = 1; channels <= 80; channels += 15) { 702 DWConvMicrokernelTester() 703 .cr(16) 704 .kr(25) 705 .channels(16) 706 .width(5) 707 .output_stride(83) 708 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2); 709 } 710 } 711 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2,multipixel_with_qmin)712 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, multipixel_with_qmin) { 713 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 714 for (size_t channels = 1; channels <= 80; channels += 15) { 715 DWConvMicrokernelTester() 716 .cr(16) 717 .kr(25) 718 .channels(channels) 719 .width(3) 720 .qmin(128) 721 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2); 722 } 723 } 724 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2,multipixel_with_qmax)725 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, multipixel_with_qmax) { 726 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 727 for (size_t channels = 1; channels <= 80; channels += 15) { 728 DWConvMicrokernelTester() 729 .cr(16) 730 .kr(25) 731 .channels(channels) 732 .width(3) 733 .qmax(128) 734 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2); 735 } 736 } 737 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2,input_offset)738 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, input_offset) { 739 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 740 for (uint32_t channels = 32; channels < 256; channels += 48) { 741 DWConvMicrokernelTester() 742 .cr(16) 743 .kr(25) 744 .channels(channels) 745 .input_offset(304) 746 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2); 747 } 748 } 749 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2,zero)750 TEST(F16_DWCONV_MINMAX_UP16X25__NEONFP16ARITH_ACC2, zero) { 751 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 752 for (uint32_t mz = 0; mz < 25; mz++) { 753 for (uint32_t channels = 32; channels < 256; channels += 48) { 754 DWConvMicrokernelTester() 755 .cr(16) 756 .kr(25) 757 .channels(channels) 758 .input_offset(304) 759 .zero_index(mz) 760 .Test(xnn_f16_dwconv_minmax_ukernel_up16x25__neonfp16arith_acc2); 761 } 762 } 763 } 764 #endif // XNN_ARCH_ARM64 765 766 767 #if XNN_ARCH_ARM64 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH,c_eq_8)768 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, c_eq_8) { 769 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 770 DWConvMicrokernelTester() 771 .cr(8) 772 .kr(9) 773 .channels(8) 774 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith); 775 } 776 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH,c_div_8)777 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, c_div_8) { 778 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 779 for (uint32_t channels = 16; channels < 128; channels += 24) { 780 DWConvMicrokernelTester() 781 .cr(8) 782 .kr(9) 783 .channels(channels) 784 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith); 785 } 786 } 787 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH,c_div_8_with_qmin)788 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, c_div_8_with_qmin) { 789 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 790 for (uint32_t channels = 16; channels < 128; channels += 24) { 791 DWConvMicrokernelTester() 792 .cr(8) 793 .kr(9) 794 .channels(channels) 795 .qmin(128) 796 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith); 797 } 798 } 799 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH,c_div_8_with_qmax)800 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, c_div_8_with_qmax) { 801 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 802 for (uint32_t channels = 16; channels < 128; channels += 24) { 803 DWConvMicrokernelTester() 804 .cr(8) 805 .kr(9) 806 .channels(channels) 807 .qmax(128) 808 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith); 809 } 810 } 811 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH,c_lt_8)812 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, c_lt_8) { 813 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 814 for (uint32_t channels = 1; channels < 8; channels++) { 815 DWConvMicrokernelTester() 816 .cr(8) 817 .kr(9) 818 .channels(channels) 819 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith); 820 } 821 } 822 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH,c_gt_8)823 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, c_gt_8) { 824 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 825 for (uint32_t channels = 9; channels < 16; channels++) { 826 DWConvMicrokernelTester() 827 .cr(8) 828 .kr(9) 829 .channels(channels) 830 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith); 831 } 832 } 833 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH,c_gt_8_with_qmin)834 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, c_gt_8_with_qmin) { 835 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 836 for (uint32_t channels = 9; channels < 16; channels++) { 837 DWConvMicrokernelTester() 838 .cr(8) 839 .kr(9) 840 .channels(channels) 841 .qmin(128) 842 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith); 843 } 844 } 845 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH,c_gt_8_with_qmax)846 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, c_gt_8_with_qmax) { 847 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 848 for (uint32_t channels = 9; channels < 16; channels++) { 849 DWConvMicrokernelTester() 850 .cr(8) 851 .kr(9) 852 .channels(channels) 853 .qmax(128) 854 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith); 855 } 856 } 857 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH,multipixel)858 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, multipixel) { 859 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 860 for (size_t channels = 1; channels <= 40; channels += 7) { 861 DWConvMicrokernelTester() 862 .cr(8) 863 .kr(9) 864 .channels(channels) 865 .width(3) 866 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith); 867 } 868 } 869 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH,multipixel_with_step)870 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, multipixel_with_step) { 871 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 872 for (size_t channels = 1; channels <= 40; channels += 7) { 873 for (size_t step = 2; step <= 9; step++) { 874 DWConvMicrokernelTester() 875 .cr(8) 876 .kr(9) 877 .channels(channels) 878 .width(3) 879 .step(step) 880 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith); 881 } 882 } 883 } 884 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH,multipixel_with_output_stride)885 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, multipixel_with_output_stride) { 886 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 887 for (size_t channels = 1; channels <= 40; channels += 7) { 888 DWConvMicrokernelTester() 889 .cr(8) 890 .kr(9) 891 .channels(8) 892 .width(5) 893 .output_stride(43) 894 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith); 895 } 896 } 897 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH,multipixel_with_qmin)898 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, multipixel_with_qmin) { 899 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 900 for (size_t channels = 1; channels <= 40; channels += 7) { 901 DWConvMicrokernelTester() 902 .cr(8) 903 .kr(9) 904 .channels(channels) 905 .width(3) 906 .qmin(128) 907 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith); 908 } 909 } 910 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH,multipixel_with_qmax)911 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, multipixel_with_qmax) { 912 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 913 for (size_t channels = 1; channels <= 40; channels += 7) { 914 DWConvMicrokernelTester() 915 .cr(8) 916 .kr(9) 917 .channels(channels) 918 .width(3) 919 .qmax(128) 920 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith); 921 } 922 } 923 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH,input_offset)924 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, input_offset) { 925 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 926 for (uint32_t channels = 16; channels < 128; channels += 24) { 927 DWConvMicrokernelTester() 928 .cr(8) 929 .kr(9) 930 .channels(channels) 931 .input_offset(176) 932 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith); 933 } 934 } 935 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH,zero)936 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH, zero) { 937 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 938 for (uint32_t mz = 0; mz < 9; mz++) { 939 for (uint32_t channels = 16; channels < 128; channels += 24) { 940 DWConvMicrokernelTester() 941 .cr(8) 942 .kr(9) 943 .channels(channels) 944 .input_offset(176) 945 .zero_index(mz) 946 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith); 947 } 948 } 949 } 950 #endif // XNN_ARCH_ARM64 951 952 953 #if XNN_ARCH_ARM64 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2,c_eq_8)954 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, c_eq_8) { 955 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 956 DWConvMicrokernelTester() 957 .cr(8) 958 .kr(9) 959 .channels(8) 960 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2); 961 } 962 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2,c_div_8)963 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, c_div_8) { 964 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 965 for (uint32_t channels = 16; channels < 128; channels += 24) { 966 DWConvMicrokernelTester() 967 .cr(8) 968 .kr(9) 969 .channels(channels) 970 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2); 971 } 972 } 973 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2,c_div_8_with_qmin)974 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, c_div_8_with_qmin) { 975 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 976 for (uint32_t channels = 16; channels < 128; channels += 24) { 977 DWConvMicrokernelTester() 978 .cr(8) 979 .kr(9) 980 .channels(channels) 981 .qmin(128) 982 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2); 983 } 984 } 985 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2,c_div_8_with_qmax)986 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, c_div_8_with_qmax) { 987 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 988 for (uint32_t channels = 16; channels < 128; channels += 24) { 989 DWConvMicrokernelTester() 990 .cr(8) 991 .kr(9) 992 .channels(channels) 993 .qmax(128) 994 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2); 995 } 996 } 997 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2,c_lt_8)998 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, c_lt_8) { 999 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1000 for (uint32_t channels = 1; channels < 8; channels++) { 1001 DWConvMicrokernelTester() 1002 .cr(8) 1003 .kr(9) 1004 .channels(channels) 1005 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2); 1006 } 1007 } 1008 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2,c_gt_8)1009 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, c_gt_8) { 1010 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1011 for (uint32_t channels = 9; channels < 16; channels++) { 1012 DWConvMicrokernelTester() 1013 .cr(8) 1014 .kr(9) 1015 .channels(channels) 1016 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2); 1017 } 1018 } 1019 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2,c_gt_8_with_qmin)1020 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, c_gt_8_with_qmin) { 1021 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1022 for (uint32_t channels = 9; channels < 16; channels++) { 1023 DWConvMicrokernelTester() 1024 .cr(8) 1025 .kr(9) 1026 .channels(channels) 1027 .qmin(128) 1028 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2); 1029 } 1030 } 1031 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2,c_gt_8_with_qmax)1032 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, c_gt_8_with_qmax) { 1033 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1034 for (uint32_t channels = 9; channels < 16; channels++) { 1035 DWConvMicrokernelTester() 1036 .cr(8) 1037 .kr(9) 1038 .channels(channels) 1039 .qmax(128) 1040 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2); 1041 } 1042 } 1043 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2,multipixel)1044 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, multipixel) { 1045 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1046 for (size_t channels = 1; channels <= 40; channels += 7) { 1047 DWConvMicrokernelTester() 1048 .cr(8) 1049 .kr(9) 1050 .channels(channels) 1051 .width(3) 1052 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2); 1053 } 1054 } 1055 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2,multipixel_with_step)1056 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, multipixel_with_step) { 1057 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1058 for (size_t channels = 1; channels <= 40; channels += 7) { 1059 for (size_t step = 2; step <= 9; step++) { 1060 DWConvMicrokernelTester() 1061 .cr(8) 1062 .kr(9) 1063 .channels(channels) 1064 .width(3) 1065 .step(step) 1066 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2); 1067 } 1068 } 1069 } 1070 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2,multipixel_with_output_stride)1071 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, multipixel_with_output_stride) { 1072 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1073 for (size_t channels = 1; channels <= 40; channels += 7) { 1074 DWConvMicrokernelTester() 1075 .cr(8) 1076 .kr(9) 1077 .channels(8) 1078 .width(5) 1079 .output_stride(43) 1080 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2); 1081 } 1082 } 1083 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2,multipixel_with_qmin)1084 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, multipixel_with_qmin) { 1085 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1086 for (size_t channels = 1; channels <= 40; channels += 7) { 1087 DWConvMicrokernelTester() 1088 .cr(8) 1089 .kr(9) 1090 .channels(channels) 1091 .width(3) 1092 .qmin(128) 1093 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2); 1094 } 1095 } 1096 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2,multipixel_with_qmax)1097 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, multipixel_with_qmax) { 1098 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1099 for (size_t channels = 1; channels <= 40; channels += 7) { 1100 DWConvMicrokernelTester() 1101 .cr(8) 1102 .kr(9) 1103 .channels(channels) 1104 .width(3) 1105 .qmax(128) 1106 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2); 1107 } 1108 } 1109 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2,input_offset)1110 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, input_offset) { 1111 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1112 for (uint32_t channels = 16; channels < 128; channels += 24) { 1113 DWConvMicrokernelTester() 1114 .cr(8) 1115 .kr(9) 1116 .channels(channels) 1117 .input_offset(176) 1118 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2); 1119 } 1120 } 1121 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2,zero)1122 TEST(F16_DWCONV_MINMAX_UP8X9__NEONFP16ARITH_ACC2, zero) { 1123 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1124 for (uint32_t mz = 0; mz < 9; mz++) { 1125 for (uint32_t channels = 16; channels < 128; channels += 24) { 1126 DWConvMicrokernelTester() 1127 .cr(8) 1128 .kr(9) 1129 .channels(channels) 1130 .input_offset(176) 1131 .zero_index(mz) 1132 .Test(xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2); 1133 } 1134 } 1135 } 1136 #endif // XNN_ARCH_ARM64 1137 1138 1139 #if XNN_ARCH_ARM64 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH,c_eq_16)1140 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, c_eq_16) { 1141 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1142 DWConvMicrokernelTester() 1143 .cr(16) 1144 .kr(9) 1145 .channels(16) 1146 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith); 1147 } 1148 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH,c_div_16)1149 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, c_div_16) { 1150 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1151 for (uint32_t channels = 32; channels < 256; channels += 48) { 1152 DWConvMicrokernelTester() 1153 .cr(16) 1154 .kr(9) 1155 .channels(channels) 1156 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith); 1157 } 1158 } 1159 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH,c_div_16_with_qmin)1160 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, c_div_16_with_qmin) { 1161 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1162 for (uint32_t channels = 32; channels < 256; channels += 48) { 1163 DWConvMicrokernelTester() 1164 .cr(16) 1165 .kr(9) 1166 .channels(channels) 1167 .qmin(128) 1168 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith); 1169 } 1170 } 1171 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH,c_div_16_with_qmax)1172 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, c_div_16_with_qmax) { 1173 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1174 for (uint32_t channels = 32; channels < 256; channels += 48) { 1175 DWConvMicrokernelTester() 1176 .cr(16) 1177 .kr(9) 1178 .channels(channels) 1179 .qmax(128) 1180 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith); 1181 } 1182 } 1183 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH,c_lt_16)1184 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, c_lt_16) { 1185 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1186 for (uint32_t channels = 1; channels < 16; channels++) { 1187 DWConvMicrokernelTester() 1188 .cr(16) 1189 .kr(9) 1190 .channels(channels) 1191 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith); 1192 } 1193 } 1194 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH,c_gt_16)1195 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, c_gt_16) { 1196 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1197 for (uint32_t channels = 17; channels < 32; channels++) { 1198 DWConvMicrokernelTester() 1199 .cr(16) 1200 .kr(9) 1201 .channels(channels) 1202 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith); 1203 } 1204 } 1205 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH,c_gt_16_with_qmin)1206 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, c_gt_16_with_qmin) { 1207 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1208 for (uint32_t channels = 17; channels < 32; channels++) { 1209 DWConvMicrokernelTester() 1210 .cr(16) 1211 .kr(9) 1212 .channels(channels) 1213 .qmin(128) 1214 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith); 1215 } 1216 } 1217 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH,c_gt_16_with_qmax)1218 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, c_gt_16_with_qmax) { 1219 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1220 for (uint32_t channels = 17; channels < 32; channels++) { 1221 DWConvMicrokernelTester() 1222 .cr(16) 1223 .kr(9) 1224 .channels(channels) 1225 .qmax(128) 1226 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith); 1227 } 1228 } 1229 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH,multipixel)1230 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, multipixel) { 1231 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1232 for (size_t channels = 1; channels <= 80; channels += 15) { 1233 DWConvMicrokernelTester() 1234 .cr(16) 1235 .kr(9) 1236 .channels(channels) 1237 .width(3) 1238 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith); 1239 } 1240 } 1241 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH,multipixel_with_step)1242 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, multipixel_with_step) { 1243 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1244 for (size_t channels = 1; channels <= 80; channels += 15) { 1245 for (size_t step = 2; step <= 9; step++) { 1246 DWConvMicrokernelTester() 1247 .cr(16) 1248 .kr(9) 1249 .channels(channels) 1250 .width(3) 1251 .step(step) 1252 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith); 1253 } 1254 } 1255 } 1256 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH,multipixel_with_output_stride)1257 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, multipixel_with_output_stride) { 1258 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1259 for (size_t channels = 1; channels <= 80; channels += 15) { 1260 DWConvMicrokernelTester() 1261 .cr(16) 1262 .kr(9) 1263 .channels(16) 1264 .width(5) 1265 .output_stride(83) 1266 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith); 1267 } 1268 } 1269 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH,multipixel_with_qmin)1270 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, multipixel_with_qmin) { 1271 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1272 for (size_t channels = 1; channels <= 80; channels += 15) { 1273 DWConvMicrokernelTester() 1274 .cr(16) 1275 .kr(9) 1276 .channels(channels) 1277 .width(3) 1278 .qmin(128) 1279 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith); 1280 } 1281 } 1282 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH,multipixel_with_qmax)1283 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, multipixel_with_qmax) { 1284 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1285 for (size_t channels = 1; channels <= 80; channels += 15) { 1286 DWConvMicrokernelTester() 1287 .cr(16) 1288 .kr(9) 1289 .channels(channels) 1290 .width(3) 1291 .qmax(128) 1292 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith); 1293 } 1294 } 1295 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH,input_offset)1296 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, input_offset) { 1297 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1298 for (uint32_t channels = 32; channels < 256; channels += 48) { 1299 DWConvMicrokernelTester() 1300 .cr(16) 1301 .kr(9) 1302 .channels(channels) 1303 .input_offset(304) 1304 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith); 1305 } 1306 } 1307 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH,zero)1308 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH, zero) { 1309 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1310 for (uint32_t mz = 0; mz < 9; mz++) { 1311 for (uint32_t channels = 32; channels < 256; channels += 48) { 1312 DWConvMicrokernelTester() 1313 .cr(16) 1314 .kr(9) 1315 .channels(channels) 1316 .input_offset(304) 1317 .zero_index(mz) 1318 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith); 1319 } 1320 } 1321 } 1322 #endif // XNN_ARCH_ARM64 1323 1324 1325 #if XNN_ARCH_ARM64 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2,c_eq_16)1326 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, c_eq_16) { 1327 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1328 DWConvMicrokernelTester() 1329 .cr(16) 1330 .kr(9) 1331 .channels(16) 1332 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2); 1333 } 1334 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2,c_div_16)1335 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, c_div_16) { 1336 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1337 for (uint32_t channels = 32; channels < 256; channels += 48) { 1338 DWConvMicrokernelTester() 1339 .cr(16) 1340 .kr(9) 1341 .channels(channels) 1342 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2); 1343 } 1344 } 1345 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2,c_div_16_with_qmin)1346 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, c_div_16_with_qmin) { 1347 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1348 for (uint32_t channels = 32; channels < 256; channels += 48) { 1349 DWConvMicrokernelTester() 1350 .cr(16) 1351 .kr(9) 1352 .channels(channels) 1353 .qmin(128) 1354 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2); 1355 } 1356 } 1357 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2,c_div_16_with_qmax)1358 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, c_div_16_with_qmax) { 1359 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1360 for (uint32_t channels = 32; channels < 256; channels += 48) { 1361 DWConvMicrokernelTester() 1362 .cr(16) 1363 .kr(9) 1364 .channels(channels) 1365 .qmax(128) 1366 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2); 1367 } 1368 } 1369 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2,c_lt_16)1370 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, c_lt_16) { 1371 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1372 for (uint32_t channels = 1; channels < 16; channels++) { 1373 DWConvMicrokernelTester() 1374 .cr(16) 1375 .kr(9) 1376 .channels(channels) 1377 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2); 1378 } 1379 } 1380 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2,c_gt_16)1381 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, c_gt_16) { 1382 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1383 for (uint32_t channels = 17; channels < 32; channels++) { 1384 DWConvMicrokernelTester() 1385 .cr(16) 1386 .kr(9) 1387 .channels(channels) 1388 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2); 1389 } 1390 } 1391 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2,c_gt_16_with_qmin)1392 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, c_gt_16_with_qmin) { 1393 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1394 for (uint32_t channels = 17; channels < 32; channels++) { 1395 DWConvMicrokernelTester() 1396 .cr(16) 1397 .kr(9) 1398 .channels(channels) 1399 .qmin(128) 1400 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2); 1401 } 1402 } 1403 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2,c_gt_16_with_qmax)1404 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, c_gt_16_with_qmax) { 1405 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1406 for (uint32_t channels = 17; channels < 32; channels++) { 1407 DWConvMicrokernelTester() 1408 .cr(16) 1409 .kr(9) 1410 .channels(channels) 1411 .qmax(128) 1412 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2); 1413 } 1414 } 1415 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2,multipixel)1416 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, multipixel) { 1417 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1418 for (size_t channels = 1; channels <= 80; channels += 15) { 1419 DWConvMicrokernelTester() 1420 .cr(16) 1421 .kr(9) 1422 .channels(channels) 1423 .width(3) 1424 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2); 1425 } 1426 } 1427 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2,multipixel_with_step)1428 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, multipixel_with_step) { 1429 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1430 for (size_t channels = 1; channels <= 80; channels += 15) { 1431 for (size_t step = 2; step <= 9; step++) { 1432 DWConvMicrokernelTester() 1433 .cr(16) 1434 .kr(9) 1435 .channels(channels) 1436 .width(3) 1437 .step(step) 1438 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2); 1439 } 1440 } 1441 } 1442 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2,multipixel_with_output_stride)1443 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, multipixel_with_output_stride) { 1444 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1445 for (size_t channels = 1; channels <= 80; channels += 15) { 1446 DWConvMicrokernelTester() 1447 .cr(16) 1448 .kr(9) 1449 .channels(16) 1450 .width(5) 1451 .output_stride(83) 1452 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2); 1453 } 1454 } 1455 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2,multipixel_with_qmin)1456 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, multipixel_with_qmin) { 1457 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1458 for (size_t channels = 1; channels <= 80; channels += 15) { 1459 DWConvMicrokernelTester() 1460 .cr(16) 1461 .kr(9) 1462 .channels(channels) 1463 .width(3) 1464 .qmin(128) 1465 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2); 1466 } 1467 } 1468 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2,multipixel_with_qmax)1469 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, multipixel_with_qmax) { 1470 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1471 for (size_t channels = 1; channels <= 80; channels += 15) { 1472 DWConvMicrokernelTester() 1473 .cr(16) 1474 .kr(9) 1475 .channels(channels) 1476 .width(3) 1477 .qmax(128) 1478 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2); 1479 } 1480 } 1481 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2,input_offset)1482 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, input_offset) { 1483 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1484 for (uint32_t channels = 32; channels < 256; channels += 48) { 1485 DWConvMicrokernelTester() 1486 .cr(16) 1487 .kr(9) 1488 .channels(channels) 1489 .input_offset(304) 1490 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2); 1491 } 1492 } 1493 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2,zero)1494 TEST(F16_DWCONV_MINMAX_UP16X9__NEONFP16ARITH_ACC2, zero) { 1495 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1496 for (uint32_t mz = 0; mz < 9; mz++) { 1497 for (uint32_t channels = 32; channels < 256; channels += 48) { 1498 DWConvMicrokernelTester() 1499 .cr(16) 1500 .kr(9) 1501 .channels(channels) 1502 .input_offset(304) 1503 .zero_index(mz) 1504 .Test(xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2); 1505 } 1506 } 1507 } 1508 #endif // XNN_ARCH_ARM64 1509 1510 1511 #if XNN_ARCH_ARM64 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH,c_eq_8)1512 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, c_eq_8) { 1513 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1514 DWConvMicrokernelTester() 1515 .cr(8) 1516 .kr(4) 1517 .channels(8) 1518 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith); 1519 } 1520 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH,c_div_8)1521 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, c_div_8) { 1522 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1523 for (uint32_t channels = 16; channels < 128; channels += 24) { 1524 DWConvMicrokernelTester() 1525 .cr(8) 1526 .kr(4) 1527 .channels(channels) 1528 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith); 1529 } 1530 } 1531 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH,c_div_8_with_qmin)1532 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, c_div_8_with_qmin) { 1533 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1534 for (uint32_t channels = 16; channels < 128; channels += 24) { 1535 DWConvMicrokernelTester() 1536 .cr(8) 1537 .kr(4) 1538 .channels(channels) 1539 .qmin(128) 1540 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith); 1541 } 1542 } 1543 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH,c_div_8_with_qmax)1544 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, c_div_8_with_qmax) { 1545 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1546 for (uint32_t channels = 16; channels < 128; channels += 24) { 1547 DWConvMicrokernelTester() 1548 .cr(8) 1549 .kr(4) 1550 .channels(channels) 1551 .qmax(128) 1552 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith); 1553 } 1554 } 1555 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH,c_lt_8)1556 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, c_lt_8) { 1557 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1558 for (uint32_t channels = 1; channels < 8; channels++) { 1559 DWConvMicrokernelTester() 1560 .cr(8) 1561 .kr(4) 1562 .channels(channels) 1563 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith); 1564 } 1565 } 1566 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH,c_gt_8)1567 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, c_gt_8) { 1568 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1569 for (uint32_t channels = 9; channels < 16; channels++) { 1570 DWConvMicrokernelTester() 1571 .cr(8) 1572 .kr(4) 1573 .channels(channels) 1574 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith); 1575 } 1576 } 1577 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH,c_gt_8_with_qmin)1578 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, c_gt_8_with_qmin) { 1579 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1580 for (uint32_t channels = 9; channels < 16; channels++) { 1581 DWConvMicrokernelTester() 1582 .cr(8) 1583 .kr(4) 1584 .channels(channels) 1585 .qmin(128) 1586 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith); 1587 } 1588 } 1589 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH,c_gt_8_with_qmax)1590 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, c_gt_8_with_qmax) { 1591 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1592 for (uint32_t channels = 9; channels < 16; channels++) { 1593 DWConvMicrokernelTester() 1594 .cr(8) 1595 .kr(4) 1596 .channels(channels) 1597 .qmax(128) 1598 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith); 1599 } 1600 } 1601 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH,multipixel)1602 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, multipixel) { 1603 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1604 for (size_t channels = 1; channels <= 40; channels += 7) { 1605 DWConvMicrokernelTester() 1606 .cr(8) 1607 .kr(4) 1608 .channels(channels) 1609 .width(3) 1610 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith); 1611 } 1612 } 1613 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH,multipixel_with_step)1614 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, multipixel_with_step) { 1615 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1616 for (size_t channels = 1; channels <= 40; channels += 7) { 1617 for (size_t step = 2; step <= 4; step++) { 1618 DWConvMicrokernelTester() 1619 .cr(8) 1620 .kr(4) 1621 .channels(channels) 1622 .width(3) 1623 .step(step) 1624 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith); 1625 } 1626 } 1627 } 1628 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH,multipixel_with_output_stride)1629 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, multipixel_with_output_stride) { 1630 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1631 for (size_t channels = 1; channels <= 40; channels += 7) { 1632 DWConvMicrokernelTester() 1633 .cr(8) 1634 .kr(4) 1635 .channels(8) 1636 .width(5) 1637 .output_stride(43) 1638 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith); 1639 } 1640 } 1641 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH,multipixel_with_qmin)1642 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, multipixel_with_qmin) { 1643 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1644 for (size_t channels = 1; channels <= 40; channels += 7) { 1645 DWConvMicrokernelTester() 1646 .cr(8) 1647 .kr(4) 1648 .channels(channels) 1649 .width(3) 1650 .qmin(128) 1651 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith); 1652 } 1653 } 1654 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH,multipixel_with_qmax)1655 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, multipixel_with_qmax) { 1656 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1657 for (size_t channels = 1; channels <= 40; channels += 7) { 1658 DWConvMicrokernelTester() 1659 .cr(8) 1660 .kr(4) 1661 .channels(channels) 1662 .width(3) 1663 .qmax(128) 1664 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith); 1665 } 1666 } 1667 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH,input_offset)1668 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, input_offset) { 1669 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1670 for (uint32_t channels = 16; channels < 128; channels += 24) { 1671 DWConvMicrokernelTester() 1672 .cr(8) 1673 .kr(4) 1674 .channels(channels) 1675 .input_offset(176) 1676 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith); 1677 } 1678 } 1679 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH,zero)1680 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH, zero) { 1681 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1682 for (uint32_t mz = 0; mz < 4; mz++) { 1683 for (uint32_t channels = 16; channels < 128; channels += 24) { 1684 DWConvMicrokernelTester() 1685 .cr(8) 1686 .kr(4) 1687 .channels(channels) 1688 .input_offset(176) 1689 .zero_index(mz) 1690 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith); 1691 } 1692 } 1693 } 1694 #endif // XNN_ARCH_ARM64 1695 1696 1697 #if XNN_ARCH_ARM64 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2,c_eq_8)1698 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, c_eq_8) { 1699 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1700 DWConvMicrokernelTester() 1701 .cr(8) 1702 .kr(4) 1703 .channels(8) 1704 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2); 1705 } 1706 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2,c_div_8)1707 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, c_div_8) { 1708 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1709 for (uint32_t channels = 16; channels < 128; channels += 24) { 1710 DWConvMicrokernelTester() 1711 .cr(8) 1712 .kr(4) 1713 .channels(channels) 1714 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2); 1715 } 1716 } 1717 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2,c_div_8_with_qmin)1718 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, c_div_8_with_qmin) { 1719 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1720 for (uint32_t channels = 16; channels < 128; channels += 24) { 1721 DWConvMicrokernelTester() 1722 .cr(8) 1723 .kr(4) 1724 .channels(channels) 1725 .qmin(128) 1726 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2); 1727 } 1728 } 1729 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2,c_div_8_with_qmax)1730 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, c_div_8_with_qmax) { 1731 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1732 for (uint32_t channels = 16; channels < 128; channels += 24) { 1733 DWConvMicrokernelTester() 1734 .cr(8) 1735 .kr(4) 1736 .channels(channels) 1737 .qmax(128) 1738 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2); 1739 } 1740 } 1741 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2,c_lt_8)1742 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, c_lt_8) { 1743 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1744 for (uint32_t channels = 1; channels < 8; channels++) { 1745 DWConvMicrokernelTester() 1746 .cr(8) 1747 .kr(4) 1748 .channels(channels) 1749 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2); 1750 } 1751 } 1752 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2,c_gt_8)1753 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, c_gt_8) { 1754 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1755 for (uint32_t channels = 9; channels < 16; channels++) { 1756 DWConvMicrokernelTester() 1757 .cr(8) 1758 .kr(4) 1759 .channels(channels) 1760 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2); 1761 } 1762 } 1763 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2,c_gt_8_with_qmin)1764 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, c_gt_8_with_qmin) { 1765 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1766 for (uint32_t channels = 9; channels < 16; channels++) { 1767 DWConvMicrokernelTester() 1768 .cr(8) 1769 .kr(4) 1770 .channels(channels) 1771 .qmin(128) 1772 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2); 1773 } 1774 } 1775 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2,c_gt_8_with_qmax)1776 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, c_gt_8_with_qmax) { 1777 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1778 for (uint32_t channels = 9; channels < 16; channels++) { 1779 DWConvMicrokernelTester() 1780 .cr(8) 1781 .kr(4) 1782 .channels(channels) 1783 .qmax(128) 1784 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2); 1785 } 1786 } 1787 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2,multipixel)1788 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, multipixel) { 1789 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1790 for (size_t channels = 1; channels <= 40; channels += 7) { 1791 DWConvMicrokernelTester() 1792 .cr(8) 1793 .kr(4) 1794 .channels(channels) 1795 .width(3) 1796 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2); 1797 } 1798 } 1799 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2,multipixel_with_step)1800 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, multipixel_with_step) { 1801 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1802 for (size_t channels = 1; channels <= 40; channels += 7) { 1803 for (size_t step = 2; step <= 4; step++) { 1804 DWConvMicrokernelTester() 1805 .cr(8) 1806 .kr(4) 1807 .channels(channels) 1808 .width(3) 1809 .step(step) 1810 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2); 1811 } 1812 } 1813 } 1814 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2,multipixel_with_output_stride)1815 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, multipixel_with_output_stride) { 1816 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1817 for (size_t channels = 1; channels <= 40; channels += 7) { 1818 DWConvMicrokernelTester() 1819 .cr(8) 1820 .kr(4) 1821 .channels(8) 1822 .width(5) 1823 .output_stride(43) 1824 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2); 1825 } 1826 } 1827 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2,multipixel_with_qmin)1828 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, multipixel_with_qmin) { 1829 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1830 for (size_t channels = 1; channels <= 40; channels += 7) { 1831 DWConvMicrokernelTester() 1832 .cr(8) 1833 .kr(4) 1834 .channels(channels) 1835 .width(3) 1836 .qmin(128) 1837 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2); 1838 } 1839 } 1840 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2,multipixel_with_qmax)1841 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, multipixel_with_qmax) { 1842 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1843 for (size_t channels = 1; channels <= 40; channels += 7) { 1844 DWConvMicrokernelTester() 1845 .cr(8) 1846 .kr(4) 1847 .channels(channels) 1848 .width(3) 1849 .qmax(128) 1850 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2); 1851 } 1852 } 1853 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2,input_offset)1854 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, input_offset) { 1855 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1856 for (uint32_t channels = 16; channels < 128; channels += 24) { 1857 DWConvMicrokernelTester() 1858 .cr(8) 1859 .kr(4) 1860 .channels(channels) 1861 .input_offset(176) 1862 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2); 1863 } 1864 } 1865 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2,zero)1866 TEST(F16_DWCONV_MINMAX_UP8X4__NEONFP16ARITH_ACC2, zero) { 1867 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1868 for (uint32_t mz = 0; mz < 4; mz++) { 1869 for (uint32_t channels = 16; channels < 128; channels += 24) { 1870 DWConvMicrokernelTester() 1871 .cr(8) 1872 .kr(4) 1873 .channels(channels) 1874 .input_offset(176) 1875 .zero_index(mz) 1876 .Test(xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2); 1877 } 1878 } 1879 } 1880 #endif // XNN_ARCH_ARM64 1881 1882 1883 #if XNN_ARCH_ARM64 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH,c_eq_16)1884 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, c_eq_16) { 1885 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1886 DWConvMicrokernelTester() 1887 .cr(16) 1888 .kr(4) 1889 .channels(16) 1890 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith); 1891 } 1892 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH,c_div_16)1893 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, c_div_16) { 1894 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1895 for (uint32_t channels = 32; channels < 256; channels += 48) { 1896 DWConvMicrokernelTester() 1897 .cr(16) 1898 .kr(4) 1899 .channels(channels) 1900 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith); 1901 } 1902 } 1903 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH,c_div_16_with_qmin)1904 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, c_div_16_with_qmin) { 1905 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1906 for (uint32_t channels = 32; channels < 256; channels += 48) { 1907 DWConvMicrokernelTester() 1908 .cr(16) 1909 .kr(4) 1910 .channels(channels) 1911 .qmin(128) 1912 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith); 1913 } 1914 } 1915 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH,c_div_16_with_qmax)1916 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, c_div_16_with_qmax) { 1917 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1918 for (uint32_t channels = 32; channels < 256; channels += 48) { 1919 DWConvMicrokernelTester() 1920 .cr(16) 1921 .kr(4) 1922 .channels(channels) 1923 .qmax(128) 1924 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith); 1925 } 1926 } 1927 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH,c_lt_16)1928 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, c_lt_16) { 1929 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1930 for (uint32_t channels = 1; channels < 16; channels++) { 1931 DWConvMicrokernelTester() 1932 .cr(16) 1933 .kr(4) 1934 .channels(channels) 1935 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith); 1936 } 1937 } 1938 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH,c_gt_16)1939 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, c_gt_16) { 1940 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1941 for (uint32_t channels = 17; channels < 32; channels++) { 1942 DWConvMicrokernelTester() 1943 .cr(16) 1944 .kr(4) 1945 .channels(channels) 1946 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith); 1947 } 1948 } 1949 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH,c_gt_16_with_qmin)1950 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, c_gt_16_with_qmin) { 1951 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1952 for (uint32_t channels = 17; channels < 32; channels++) { 1953 DWConvMicrokernelTester() 1954 .cr(16) 1955 .kr(4) 1956 .channels(channels) 1957 .qmin(128) 1958 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith); 1959 } 1960 } 1961 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH,c_gt_16_with_qmax)1962 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, c_gt_16_with_qmax) { 1963 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1964 for (uint32_t channels = 17; channels < 32; channels++) { 1965 DWConvMicrokernelTester() 1966 .cr(16) 1967 .kr(4) 1968 .channels(channels) 1969 .qmax(128) 1970 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith); 1971 } 1972 } 1973 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH,multipixel)1974 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, multipixel) { 1975 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1976 for (size_t channels = 1; channels <= 80; channels += 15) { 1977 DWConvMicrokernelTester() 1978 .cr(16) 1979 .kr(4) 1980 .channels(channels) 1981 .width(3) 1982 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith); 1983 } 1984 } 1985 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH,multipixel_with_step)1986 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, multipixel_with_step) { 1987 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 1988 for (size_t channels = 1; channels <= 80; channels += 15) { 1989 for (size_t step = 2; step <= 4; step++) { 1990 DWConvMicrokernelTester() 1991 .cr(16) 1992 .kr(4) 1993 .channels(channels) 1994 .width(3) 1995 .step(step) 1996 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith); 1997 } 1998 } 1999 } 2000 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH,multipixel_with_output_stride)2001 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, multipixel_with_output_stride) { 2002 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2003 for (size_t channels = 1; channels <= 80; channels += 15) { 2004 DWConvMicrokernelTester() 2005 .cr(16) 2006 .kr(4) 2007 .channels(16) 2008 .width(5) 2009 .output_stride(83) 2010 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith); 2011 } 2012 } 2013 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH,multipixel_with_qmin)2014 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, multipixel_with_qmin) { 2015 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2016 for (size_t channels = 1; channels <= 80; channels += 15) { 2017 DWConvMicrokernelTester() 2018 .cr(16) 2019 .kr(4) 2020 .channels(channels) 2021 .width(3) 2022 .qmin(128) 2023 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith); 2024 } 2025 } 2026 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH,multipixel_with_qmax)2027 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, multipixel_with_qmax) { 2028 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2029 for (size_t channels = 1; channels <= 80; channels += 15) { 2030 DWConvMicrokernelTester() 2031 .cr(16) 2032 .kr(4) 2033 .channels(channels) 2034 .width(3) 2035 .qmax(128) 2036 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith); 2037 } 2038 } 2039 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH,input_offset)2040 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, input_offset) { 2041 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2042 for (uint32_t channels = 32; channels < 256; channels += 48) { 2043 DWConvMicrokernelTester() 2044 .cr(16) 2045 .kr(4) 2046 .channels(channels) 2047 .input_offset(304) 2048 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith); 2049 } 2050 } 2051 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH,zero)2052 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH, zero) { 2053 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2054 for (uint32_t mz = 0; mz < 4; mz++) { 2055 for (uint32_t channels = 32; channels < 256; channels += 48) { 2056 DWConvMicrokernelTester() 2057 .cr(16) 2058 .kr(4) 2059 .channels(channels) 2060 .input_offset(304) 2061 .zero_index(mz) 2062 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith); 2063 } 2064 } 2065 } 2066 #endif // XNN_ARCH_ARM64 2067 2068 2069 #if XNN_ARCH_ARM64 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2,c_eq_16)2070 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, c_eq_16) { 2071 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2072 DWConvMicrokernelTester() 2073 .cr(16) 2074 .kr(4) 2075 .channels(16) 2076 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2); 2077 } 2078 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2,c_div_16)2079 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, c_div_16) { 2080 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2081 for (uint32_t channels = 32; channels < 256; channels += 48) { 2082 DWConvMicrokernelTester() 2083 .cr(16) 2084 .kr(4) 2085 .channels(channels) 2086 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2); 2087 } 2088 } 2089 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2,c_div_16_with_qmin)2090 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, c_div_16_with_qmin) { 2091 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2092 for (uint32_t channels = 32; channels < 256; channels += 48) { 2093 DWConvMicrokernelTester() 2094 .cr(16) 2095 .kr(4) 2096 .channels(channels) 2097 .qmin(128) 2098 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2); 2099 } 2100 } 2101 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2,c_div_16_with_qmax)2102 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, c_div_16_with_qmax) { 2103 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2104 for (uint32_t channels = 32; channels < 256; channels += 48) { 2105 DWConvMicrokernelTester() 2106 .cr(16) 2107 .kr(4) 2108 .channels(channels) 2109 .qmax(128) 2110 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2); 2111 } 2112 } 2113 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2,c_lt_16)2114 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, c_lt_16) { 2115 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2116 for (uint32_t channels = 1; channels < 16; channels++) { 2117 DWConvMicrokernelTester() 2118 .cr(16) 2119 .kr(4) 2120 .channels(channels) 2121 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2); 2122 } 2123 } 2124 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2,c_gt_16)2125 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, c_gt_16) { 2126 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2127 for (uint32_t channels = 17; channels < 32; channels++) { 2128 DWConvMicrokernelTester() 2129 .cr(16) 2130 .kr(4) 2131 .channels(channels) 2132 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2); 2133 } 2134 } 2135 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2,c_gt_16_with_qmin)2136 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, c_gt_16_with_qmin) { 2137 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2138 for (uint32_t channels = 17; channels < 32; channels++) { 2139 DWConvMicrokernelTester() 2140 .cr(16) 2141 .kr(4) 2142 .channels(channels) 2143 .qmin(128) 2144 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2); 2145 } 2146 } 2147 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2,c_gt_16_with_qmax)2148 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, c_gt_16_with_qmax) { 2149 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2150 for (uint32_t channels = 17; channels < 32; channels++) { 2151 DWConvMicrokernelTester() 2152 .cr(16) 2153 .kr(4) 2154 .channels(channels) 2155 .qmax(128) 2156 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2); 2157 } 2158 } 2159 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2,multipixel)2160 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, multipixel) { 2161 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2162 for (size_t channels = 1; channels <= 80; channels += 15) { 2163 DWConvMicrokernelTester() 2164 .cr(16) 2165 .kr(4) 2166 .channels(channels) 2167 .width(3) 2168 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2); 2169 } 2170 } 2171 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2,multipixel_with_step)2172 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, multipixel_with_step) { 2173 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2174 for (size_t channels = 1; channels <= 80; channels += 15) { 2175 for (size_t step = 2; step <= 4; step++) { 2176 DWConvMicrokernelTester() 2177 .cr(16) 2178 .kr(4) 2179 .channels(channels) 2180 .width(3) 2181 .step(step) 2182 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2); 2183 } 2184 } 2185 } 2186 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2,multipixel_with_output_stride)2187 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, multipixel_with_output_stride) { 2188 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2189 for (size_t channels = 1; channels <= 80; channels += 15) { 2190 DWConvMicrokernelTester() 2191 .cr(16) 2192 .kr(4) 2193 .channels(16) 2194 .width(5) 2195 .output_stride(83) 2196 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2); 2197 } 2198 } 2199 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2,multipixel_with_qmin)2200 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, multipixel_with_qmin) { 2201 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2202 for (size_t channels = 1; channels <= 80; channels += 15) { 2203 DWConvMicrokernelTester() 2204 .cr(16) 2205 .kr(4) 2206 .channels(channels) 2207 .width(3) 2208 .qmin(128) 2209 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2); 2210 } 2211 } 2212 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2,multipixel_with_qmax)2213 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, multipixel_with_qmax) { 2214 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2215 for (size_t channels = 1; channels <= 80; channels += 15) { 2216 DWConvMicrokernelTester() 2217 .cr(16) 2218 .kr(4) 2219 .channels(channels) 2220 .width(3) 2221 .qmax(128) 2222 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2); 2223 } 2224 } 2225 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2,input_offset)2226 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, input_offset) { 2227 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2228 for (uint32_t channels = 32; channels < 256; channels += 48) { 2229 DWConvMicrokernelTester() 2230 .cr(16) 2231 .kr(4) 2232 .channels(channels) 2233 .input_offset(304) 2234 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2); 2235 } 2236 } 2237 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2,zero)2238 TEST(F16_DWCONV_MINMAX_UP16X4__NEONFP16ARITH_ACC2, zero) { 2239 TEST_REQUIRES_ARM_NEON_FP16_ARITH; 2240 for (uint32_t mz = 0; mz < 4; mz++) { 2241 for (uint32_t channels = 32; channels < 256; channels += 48) { 2242 DWConvMicrokernelTester() 2243 .cr(16) 2244 .kr(4) 2245 .channels(channels) 2246 .input_offset(304) 2247 .zero_index(mz) 2248 .Test(xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2); 2249 } 2250 } 2251 } 2252 #endif // XNN_ARCH_ARM64 2253