1 // Copyright (c) Facebook, Inc. and its affiliates. 2 // All rights reserved. 3 // 4 // Copyright 2019 Google LLC 5 // 6 // This source code is licensed under the BSD-style license found in the 7 // LICENSE file in the root directory of this source tree. 8 // 9 // Auto-generated file. Do not edit! 10 // Specification: test/qs8-dwconv-minmax-rndnu.yaml 11 // Generator: tools/generate-dwconv-test.py 12 13 14 #include <gtest/gtest.h> 15 16 #include <xnnpack/common.h> 17 #include <xnnpack/isa-checks.h> 18 19 #include <xnnpack/dwconv.h> 20 #include "dwconv-microkernel-tester.h" 21 22 23 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MLA8_LD64,c_eq_8)24 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MLA8_LD64, c_eq_8) { 25 TEST_REQUIRES_ARM_NEON; 26 DWConvMicrokernelTester() 27 .cr(8) 28 .kr(9) 29 .channels(8) 30 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 31 } 32 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MLA8_LD64,c_div_8)33 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MLA8_LD64, c_div_8) { 34 TEST_REQUIRES_ARM_NEON; 35 for (uint32_t channels = 16; channels < 128; channels += 24) { 36 DWConvMicrokernelTester() 37 .cr(8) 38 .kr(9) 39 .channels(channels) 40 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 41 } 42 } 43 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MLA8_LD64,c_div_8_with_qmin)44 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MLA8_LD64, c_div_8_with_qmin) { 45 TEST_REQUIRES_ARM_NEON; 46 for (uint32_t channels = 16; channels < 128; channels += 24) { 47 DWConvMicrokernelTester() 48 .cr(8) 49 .kr(9) 50 .channels(channels) 51 .qmin(128) 52 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 53 } 54 } 55 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MLA8_LD64,c_div_8_with_qmax)56 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MLA8_LD64, c_div_8_with_qmax) { 57 TEST_REQUIRES_ARM_NEON; 58 for (uint32_t channels = 16; channels < 128; channels += 24) { 59 DWConvMicrokernelTester() 60 .cr(8) 61 .kr(9) 62 .channels(channels) 63 .qmax(128) 64 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 65 } 66 } 67 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MLA8_LD64,c_lt_8)68 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MLA8_LD64, c_lt_8) { 69 TEST_REQUIRES_ARM_NEON; 70 for (uint32_t channels = 1; channels < 8; channels++) { 71 DWConvMicrokernelTester() 72 .cr(8) 73 .kr(9) 74 .channels(channels) 75 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 76 } 77 } 78 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MLA8_LD64,c_gt_8)79 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MLA8_LD64, c_gt_8) { 80 TEST_REQUIRES_ARM_NEON; 81 for (uint32_t channels = 9; channels < 16; channels++) { 82 DWConvMicrokernelTester() 83 .cr(8) 84 .kr(9) 85 .channels(channels) 86 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 87 } 88 } 89 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MLA8_LD64,c_gt_8_with_qmin)90 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MLA8_LD64, c_gt_8_with_qmin) { 91 TEST_REQUIRES_ARM_NEON; 92 for (uint32_t channels = 9; channels < 16; channels++) { 93 DWConvMicrokernelTester() 94 .cr(8) 95 .kr(9) 96 .channels(channels) 97 .qmin(128) 98 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 99 } 100 } 101 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MLA8_LD64,c_gt_8_with_qmax)102 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MLA8_LD64, c_gt_8_with_qmax) { 103 TEST_REQUIRES_ARM_NEON; 104 for (uint32_t channels = 9; channels < 16; channels++) { 105 DWConvMicrokernelTester() 106 .cr(8) 107 .kr(9) 108 .channels(channels) 109 .qmax(128) 110 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 111 } 112 } 113 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MLA8_LD64,multipixel)114 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MLA8_LD64, multipixel) { 115 TEST_REQUIRES_ARM_NEON; 116 for (size_t channels = 1; channels <= 40; channels += 7) { 117 DWConvMicrokernelTester() 118 .cr(8) 119 .kr(9) 120 .channels(channels) 121 .width(3) 122 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 123 } 124 } 125 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MLA8_LD64,multipixel_with_step)126 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MLA8_LD64, multipixel_with_step) { 127 TEST_REQUIRES_ARM_NEON; 128 for (size_t channels = 1; channels <= 40; channels += 7) { 129 for (size_t step = 2; step <= 9; step++) { 130 DWConvMicrokernelTester() 131 .cr(8) 132 .kr(9) 133 .channels(channels) 134 .width(3) 135 .step(step) 136 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 137 } 138 } 139 } 140 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MLA8_LD64,multipixel_with_output_stride)141 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MLA8_LD64, multipixel_with_output_stride) { 142 TEST_REQUIRES_ARM_NEON; 143 for (size_t channels = 1; channels <= 40; channels += 7) { 144 DWConvMicrokernelTester() 145 .cr(8) 146 .kr(9) 147 .channels(8) 148 .width(5) 149 .output_stride(43) 150 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 151 } 152 } 153 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MLA8_LD64,multipixel_with_qmin)154 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MLA8_LD64, multipixel_with_qmin) { 155 TEST_REQUIRES_ARM_NEON; 156 for (size_t channels = 1; channels <= 40; channels += 7) { 157 DWConvMicrokernelTester() 158 .cr(8) 159 .kr(9) 160 .channels(channels) 161 .width(3) 162 .qmin(128) 163 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 164 } 165 } 166 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MLA8_LD64,multipixel_with_qmax)167 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MLA8_LD64, multipixel_with_qmax) { 168 TEST_REQUIRES_ARM_NEON; 169 for (size_t channels = 1; channels <= 40; channels += 7) { 170 DWConvMicrokernelTester() 171 .cr(8) 172 .kr(9) 173 .channels(channels) 174 .width(3) 175 .qmax(128) 176 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 177 } 178 } 179 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MLA8_LD64,input_offset)180 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MLA8_LD64, input_offset) { 181 TEST_REQUIRES_ARM_NEON; 182 for (uint32_t channels = 16; channels < 128; channels += 24) { 183 DWConvMicrokernelTester() 184 .cr(8) 185 .kr(9) 186 .channels(channels) 187 .input_offset(176) 188 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 189 } 190 } 191 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MLA8_LD64,zero)192 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MLA8_LD64, zero) { 193 TEST_REQUIRES_ARM_NEON; 194 for (uint32_t mz = 0; mz < 9; mz++) { 195 for (uint32_t channels = 16; channels < 128; channels += 24) { 196 DWConvMicrokernelTester() 197 .cr(8) 198 .kr(9) 199 .channels(channels) 200 .input_offset(176) 201 .zero_index(mz) 202 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 203 } 204 } 205 } 206 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 207 208 209 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8_LD64,c_eq_8)210 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8_LD64, c_eq_8) { 211 TEST_REQUIRES_ARM_NEON; 212 DWConvMicrokernelTester() 213 .cr(8) 214 .kr(9) 215 .channels(8) 216 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 217 } 218 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8_LD64,c_div_8)219 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8_LD64, c_div_8) { 220 TEST_REQUIRES_ARM_NEON; 221 for (uint32_t channels = 16; channels < 128; channels += 24) { 222 DWConvMicrokernelTester() 223 .cr(8) 224 .kr(9) 225 .channels(channels) 226 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 227 } 228 } 229 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8_LD64,c_div_8_with_qmin)230 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8_LD64, c_div_8_with_qmin) { 231 TEST_REQUIRES_ARM_NEON; 232 for (uint32_t channels = 16; channels < 128; channels += 24) { 233 DWConvMicrokernelTester() 234 .cr(8) 235 .kr(9) 236 .channels(channels) 237 .qmin(128) 238 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 239 } 240 } 241 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8_LD64,c_div_8_with_qmax)242 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8_LD64, c_div_8_with_qmax) { 243 TEST_REQUIRES_ARM_NEON; 244 for (uint32_t channels = 16; channels < 128; channels += 24) { 245 DWConvMicrokernelTester() 246 .cr(8) 247 .kr(9) 248 .channels(channels) 249 .qmax(128) 250 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 251 } 252 } 253 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8_LD64,c_lt_8)254 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8_LD64, c_lt_8) { 255 TEST_REQUIRES_ARM_NEON; 256 for (uint32_t channels = 1; channels < 8; channels++) { 257 DWConvMicrokernelTester() 258 .cr(8) 259 .kr(9) 260 .channels(channels) 261 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 262 } 263 } 264 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8_LD64,c_gt_8)265 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8_LD64, c_gt_8) { 266 TEST_REQUIRES_ARM_NEON; 267 for (uint32_t channels = 9; channels < 16; channels++) { 268 DWConvMicrokernelTester() 269 .cr(8) 270 .kr(9) 271 .channels(channels) 272 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 273 } 274 } 275 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8_LD64,c_gt_8_with_qmin)276 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8_LD64, c_gt_8_with_qmin) { 277 TEST_REQUIRES_ARM_NEON; 278 for (uint32_t channels = 9; channels < 16; channels++) { 279 DWConvMicrokernelTester() 280 .cr(8) 281 .kr(9) 282 .channels(channels) 283 .qmin(128) 284 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 285 } 286 } 287 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8_LD64,c_gt_8_with_qmax)288 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8_LD64, c_gt_8_with_qmax) { 289 TEST_REQUIRES_ARM_NEON; 290 for (uint32_t channels = 9; channels < 16; channels++) { 291 DWConvMicrokernelTester() 292 .cr(8) 293 .kr(9) 294 .channels(channels) 295 .qmax(128) 296 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 297 } 298 } 299 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8_LD64,multipixel)300 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8_LD64, multipixel) { 301 TEST_REQUIRES_ARM_NEON; 302 for (size_t channels = 1; channels <= 40; channels += 7) { 303 DWConvMicrokernelTester() 304 .cr(8) 305 .kr(9) 306 .channels(channels) 307 .width(3) 308 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 309 } 310 } 311 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8_LD64,multipixel_with_step)312 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8_LD64, multipixel_with_step) { 313 TEST_REQUIRES_ARM_NEON; 314 for (size_t channels = 1; channels <= 40; channels += 7) { 315 for (size_t step = 2; step <= 9; step++) { 316 DWConvMicrokernelTester() 317 .cr(8) 318 .kr(9) 319 .channels(channels) 320 .width(3) 321 .step(step) 322 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 323 } 324 } 325 } 326 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8_LD64,multipixel_with_output_stride)327 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8_LD64, multipixel_with_output_stride) { 328 TEST_REQUIRES_ARM_NEON; 329 for (size_t channels = 1; channels <= 40; channels += 7) { 330 DWConvMicrokernelTester() 331 .cr(8) 332 .kr(9) 333 .channels(8) 334 .width(5) 335 .output_stride(43) 336 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 337 } 338 } 339 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8_LD64,multipixel_with_qmin)340 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8_LD64, multipixel_with_qmin) { 341 TEST_REQUIRES_ARM_NEON; 342 for (size_t channels = 1; channels <= 40; channels += 7) { 343 DWConvMicrokernelTester() 344 .cr(8) 345 .kr(9) 346 .channels(channels) 347 .width(3) 348 .qmin(128) 349 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 350 } 351 } 352 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8_LD64,multipixel_with_qmax)353 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8_LD64, multipixel_with_qmax) { 354 TEST_REQUIRES_ARM_NEON; 355 for (size_t channels = 1; channels <= 40; channels += 7) { 356 DWConvMicrokernelTester() 357 .cr(8) 358 .kr(9) 359 .channels(channels) 360 .width(3) 361 .qmax(128) 362 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 363 } 364 } 365 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8_LD64,input_offset)366 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8_LD64, input_offset) { 367 TEST_REQUIRES_ARM_NEON; 368 for (uint32_t channels = 16; channels < 128; channels += 24) { 369 DWConvMicrokernelTester() 370 .cr(8) 371 .kr(9) 372 .channels(channels) 373 .input_offset(176) 374 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 375 } 376 } 377 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8_LD64,zero)378 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL8_LD64, zero) { 379 TEST_REQUIRES_ARM_NEON; 380 for (uint32_t mz = 0; mz < 9; mz++) { 381 for (uint32_t channels = 16; channels < 128; channels += 24) { 382 DWConvMicrokernelTester() 383 .cr(8) 384 .kr(9) 385 .channels(channels) 386 .input_offset(176) 387 .zero_index(mz) 388 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 389 } 390 } 391 } 392 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 393 394 395 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16,c_eq_8)396 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16, c_eq_8) { 397 TEST_REQUIRES_ARM_NEON; 398 DWConvMicrokernelTester() 399 .cr(8) 400 .kr(9) 401 .channels(8) 402 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 403 } 404 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16,c_div_8)405 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16, c_div_8) { 406 TEST_REQUIRES_ARM_NEON; 407 for (uint32_t channels = 16; channels < 128; channels += 24) { 408 DWConvMicrokernelTester() 409 .cr(8) 410 .kr(9) 411 .channels(channels) 412 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 413 } 414 } 415 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16,c_div_8_with_qmin)416 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16, c_div_8_with_qmin) { 417 TEST_REQUIRES_ARM_NEON; 418 for (uint32_t channels = 16; channels < 128; channels += 24) { 419 DWConvMicrokernelTester() 420 .cr(8) 421 .kr(9) 422 .channels(channels) 423 .qmin(128) 424 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 425 } 426 } 427 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16,c_div_8_with_qmax)428 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16, c_div_8_with_qmax) { 429 TEST_REQUIRES_ARM_NEON; 430 for (uint32_t channels = 16; channels < 128; channels += 24) { 431 DWConvMicrokernelTester() 432 .cr(8) 433 .kr(9) 434 .channels(channels) 435 .qmax(128) 436 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 437 } 438 } 439 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16,c_lt_8)440 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16, c_lt_8) { 441 TEST_REQUIRES_ARM_NEON; 442 for (uint32_t channels = 1; channels < 8; channels++) { 443 DWConvMicrokernelTester() 444 .cr(8) 445 .kr(9) 446 .channels(channels) 447 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 448 } 449 } 450 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16,c_gt_8)451 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16, c_gt_8) { 452 TEST_REQUIRES_ARM_NEON; 453 for (uint32_t channels = 9; channels < 16; channels++) { 454 DWConvMicrokernelTester() 455 .cr(8) 456 .kr(9) 457 .channels(channels) 458 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 459 } 460 } 461 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16,c_gt_8_with_qmin)462 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16, c_gt_8_with_qmin) { 463 TEST_REQUIRES_ARM_NEON; 464 for (uint32_t channels = 9; channels < 16; channels++) { 465 DWConvMicrokernelTester() 466 .cr(8) 467 .kr(9) 468 .channels(channels) 469 .qmin(128) 470 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 471 } 472 } 473 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16,c_gt_8_with_qmax)474 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16, c_gt_8_with_qmax) { 475 TEST_REQUIRES_ARM_NEON; 476 for (uint32_t channels = 9; channels < 16; channels++) { 477 DWConvMicrokernelTester() 478 .cr(8) 479 .kr(9) 480 .channels(channels) 481 .qmax(128) 482 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 483 } 484 } 485 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16,multipixel)486 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16, multipixel) { 487 TEST_REQUIRES_ARM_NEON; 488 for (size_t channels = 1; channels <= 40; channels += 7) { 489 DWConvMicrokernelTester() 490 .cr(8) 491 .kr(9) 492 .channels(channels) 493 .width(3) 494 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 495 } 496 } 497 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16,multipixel_with_step)498 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16, multipixel_with_step) { 499 TEST_REQUIRES_ARM_NEON; 500 for (size_t channels = 1; channels <= 40; channels += 7) { 501 for (size_t step = 2; step <= 9; step++) { 502 DWConvMicrokernelTester() 503 .cr(8) 504 .kr(9) 505 .channels(channels) 506 .width(3) 507 .step(step) 508 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 509 } 510 } 511 } 512 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16,multipixel_with_output_stride)513 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16, multipixel_with_output_stride) { 514 TEST_REQUIRES_ARM_NEON; 515 for (size_t channels = 1; channels <= 40; channels += 7) { 516 DWConvMicrokernelTester() 517 .cr(8) 518 .kr(9) 519 .channels(8) 520 .width(5) 521 .output_stride(43) 522 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 523 } 524 } 525 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16,multipixel_with_qmin)526 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16, multipixel_with_qmin) { 527 TEST_REQUIRES_ARM_NEON; 528 for (size_t channels = 1; channels <= 40; channels += 7) { 529 DWConvMicrokernelTester() 530 .cr(8) 531 .kr(9) 532 .channels(channels) 533 .width(3) 534 .qmin(128) 535 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 536 } 537 } 538 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16,multipixel_with_qmax)539 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16, multipixel_with_qmax) { 540 TEST_REQUIRES_ARM_NEON; 541 for (size_t channels = 1; channels <= 40; channels += 7) { 542 DWConvMicrokernelTester() 543 .cr(8) 544 .kr(9) 545 .channels(channels) 546 .width(3) 547 .qmax(128) 548 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 549 } 550 } 551 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16,input_offset)552 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16, input_offset) { 553 TEST_REQUIRES_ARM_NEON; 554 for (uint32_t channels = 16; channels < 128; channels += 24) { 555 DWConvMicrokernelTester() 556 .cr(8) 557 .kr(9) 558 .channels(channels) 559 .input_offset(176) 560 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 561 } 562 } 563 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16,zero)564 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X9__NEON_MUL16, zero) { 565 TEST_REQUIRES_ARM_NEON; 566 for (uint32_t mz = 0; mz < 9; mz++) { 567 for (uint32_t channels = 16; channels < 128; channels += 24) { 568 DWConvMicrokernelTester() 569 .cr(8) 570 .kr(9) 571 .channels(channels) 572 .input_offset(176) 573 .zero_index(mz) 574 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 575 } 576 } 577 } 578 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 579 580 581 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MLA8_LD64,c_eq_8)582 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MLA8_LD64, c_eq_8) { 583 TEST_REQUIRES_ARM_NEON; 584 DWConvMicrokernelTester() 585 .cr(8) 586 .kr(25) 587 .channels(8) 588 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 589 } 590 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MLA8_LD64,c_div_8)591 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MLA8_LD64, c_div_8) { 592 TEST_REQUIRES_ARM_NEON; 593 for (uint32_t channels = 16; channels < 128; channels += 24) { 594 DWConvMicrokernelTester() 595 .cr(8) 596 .kr(25) 597 .channels(channels) 598 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 599 } 600 } 601 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MLA8_LD64,c_div_8_with_qmin)602 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MLA8_LD64, c_div_8_with_qmin) { 603 TEST_REQUIRES_ARM_NEON; 604 for (uint32_t channels = 16; channels < 128; channels += 24) { 605 DWConvMicrokernelTester() 606 .cr(8) 607 .kr(25) 608 .channels(channels) 609 .qmin(128) 610 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 611 } 612 } 613 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MLA8_LD64,c_div_8_with_qmax)614 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MLA8_LD64, c_div_8_with_qmax) { 615 TEST_REQUIRES_ARM_NEON; 616 for (uint32_t channels = 16; channels < 128; channels += 24) { 617 DWConvMicrokernelTester() 618 .cr(8) 619 .kr(25) 620 .channels(channels) 621 .qmax(128) 622 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 623 } 624 } 625 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MLA8_LD64,c_lt_8)626 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MLA8_LD64, c_lt_8) { 627 TEST_REQUIRES_ARM_NEON; 628 for (uint32_t channels = 1; channels < 8; channels++) { 629 DWConvMicrokernelTester() 630 .cr(8) 631 .kr(25) 632 .channels(channels) 633 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 634 } 635 } 636 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MLA8_LD64,c_gt_8)637 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MLA8_LD64, c_gt_8) { 638 TEST_REQUIRES_ARM_NEON; 639 for (uint32_t channels = 9; channels < 16; channels++) { 640 DWConvMicrokernelTester() 641 .cr(8) 642 .kr(25) 643 .channels(channels) 644 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 645 } 646 } 647 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MLA8_LD64,c_gt_8_with_qmin)648 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MLA8_LD64, c_gt_8_with_qmin) { 649 TEST_REQUIRES_ARM_NEON; 650 for (uint32_t channels = 9; channels < 16; channels++) { 651 DWConvMicrokernelTester() 652 .cr(8) 653 .kr(25) 654 .channels(channels) 655 .qmin(128) 656 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 657 } 658 } 659 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MLA8_LD64,c_gt_8_with_qmax)660 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MLA8_LD64, c_gt_8_with_qmax) { 661 TEST_REQUIRES_ARM_NEON; 662 for (uint32_t channels = 9; channels < 16; channels++) { 663 DWConvMicrokernelTester() 664 .cr(8) 665 .kr(25) 666 .channels(channels) 667 .qmax(128) 668 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 669 } 670 } 671 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MLA8_LD64,multipixel)672 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MLA8_LD64, multipixel) { 673 TEST_REQUIRES_ARM_NEON; 674 for (size_t channels = 1; channels <= 40; channels += 7) { 675 DWConvMicrokernelTester() 676 .cr(8) 677 .kr(25) 678 .channels(channels) 679 .width(3) 680 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 681 } 682 } 683 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MLA8_LD64,multipixel_with_step)684 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MLA8_LD64, multipixel_with_step) { 685 TEST_REQUIRES_ARM_NEON; 686 for (size_t channels = 1; channels <= 40; channels += 7) { 687 for (size_t step = 2; step <= 25; step++) { 688 DWConvMicrokernelTester() 689 .cr(8) 690 .kr(25) 691 .channels(channels) 692 .width(3) 693 .step(step) 694 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 695 } 696 } 697 } 698 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MLA8_LD64,multipixel_with_output_stride)699 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MLA8_LD64, multipixel_with_output_stride) { 700 TEST_REQUIRES_ARM_NEON; 701 for (size_t channels = 1; channels <= 40; channels += 7) { 702 DWConvMicrokernelTester() 703 .cr(8) 704 .kr(25) 705 .channels(8) 706 .width(5) 707 .output_stride(43) 708 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 709 } 710 } 711 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MLA8_LD64,multipixel_with_qmin)712 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MLA8_LD64, multipixel_with_qmin) { 713 TEST_REQUIRES_ARM_NEON; 714 for (size_t channels = 1; channels <= 40; channels += 7) { 715 DWConvMicrokernelTester() 716 .cr(8) 717 .kr(25) 718 .channels(channels) 719 .width(3) 720 .qmin(128) 721 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 722 } 723 } 724 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MLA8_LD64,multipixel_with_qmax)725 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MLA8_LD64, multipixel_with_qmax) { 726 TEST_REQUIRES_ARM_NEON; 727 for (size_t channels = 1; channels <= 40; channels += 7) { 728 DWConvMicrokernelTester() 729 .cr(8) 730 .kr(25) 731 .channels(channels) 732 .width(3) 733 .qmax(128) 734 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 735 } 736 } 737 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MLA8_LD64,input_offset)738 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MLA8_LD64, input_offset) { 739 TEST_REQUIRES_ARM_NEON; 740 for (uint32_t channels = 16; channels < 128; channels += 24) { 741 DWConvMicrokernelTester() 742 .cr(8) 743 .kr(25) 744 .channels(channels) 745 .input_offset(176) 746 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 747 } 748 } 749 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MLA8_LD64,zero)750 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MLA8_LD64, zero) { 751 TEST_REQUIRES_ARM_NEON; 752 for (uint32_t mz = 0; mz < 25; mz++) { 753 for (uint32_t channels = 16; channels < 128; channels += 24) { 754 DWConvMicrokernelTester() 755 .cr(8) 756 .kr(25) 757 .channels(channels) 758 .input_offset(176) 759 .zero_index(mz) 760 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 761 } 762 } 763 } 764 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 765 766 767 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8_LD64,c_eq_8)768 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8_LD64, c_eq_8) { 769 TEST_REQUIRES_ARM_NEON; 770 DWConvMicrokernelTester() 771 .cr(8) 772 .kr(25) 773 .channels(8) 774 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 775 } 776 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8_LD64,c_div_8)777 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8_LD64, c_div_8) { 778 TEST_REQUIRES_ARM_NEON; 779 for (uint32_t channels = 16; channels < 128; channels += 24) { 780 DWConvMicrokernelTester() 781 .cr(8) 782 .kr(25) 783 .channels(channels) 784 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 785 } 786 } 787 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8_LD64,c_div_8_with_qmin)788 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8_LD64, c_div_8_with_qmin) { 789 TEST_REQUIRES_ARM_NEON; 790 for (uint32_t channels = 16; channels < 128; channels += 24) { 791 DWConvMicrokernelTester() 792 .cr(8) 793 .kr(25) 794 .channels(channels) 795 .qmin(128) 796 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 797 } 798 } 799 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8_LD64,c_div_8_with_qmax)800 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8_LD64, c_div_8_with_qmax) { 801 TEST_REQUIRES_ARM_NEON; 802 for (uint32_t channels = 16; channels < 128; channels += 24) { 803 DWConvMicrokernelTester() 804 .cr(8) 805 .kr(25) 806 .channels(channels) 807 .qmax(128) 808 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 809 } 810 } 811 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8_LD64,c_lt_8)812 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8_LD64, c_lt_8) { 813 TEST_REQUIRES_ARM_NEON; 814 for (uint32_t channels = 1; channels < 8; channels++) { 815 DWConvMicrokernelTester() 816 .cr(8) 817 .kr(25) 818 .channels(channels) 819 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 820 } 821 } 822 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8_LD64,c_gt_8)823 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8_LD64, c_gt_8) { 824 TEST_REQUIRES_ARM_NEON; 825 for (uint32_t channels = 9; channels < 16; channels++) { 826 DWConvMicrokernelTester() 827 .cr(8) 828 .kr(25) 829 .channels(channels) 830 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 831 } 832 } 833 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8_LD64,c_gt_8_with_qmin)834 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8_LD64, c_gt_8_with_qmin) { 835 TEST_REQUIRES_ARM_NEON; 836 for (uint32_t channels = 9; channels < 16; channels++) { 837 DWConvMicrokernelTester() 838 .cr(8) 839 .kr(25) 840 .channels(channels) 841 .qmin(128) 842 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 843 } 844 } 845 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8_LD64,c_gt_8_with_qmax)846 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8_LD64, c_gt_8_with_qmax) { 847 TEST_REQUIRES_ARM_NEON; 848 for (uint32_t channels = 9; channels < 16; channels++) { 849 DWConvMicrokernelTester() 850 .cr(8) 851 .kr(25) 852 .channels(channels) 853 .qmax(128) 854 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 855 } 856 } 857 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8_LD64,multipixel)858 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8_LD64, multipixel) { 859 TEST_REQUIRES_ARM_NEON; 860 for (size_t channels = 1; channels <= 40; channels += 7) { 861 DWConvMicrokernelTester() 862 .cr(8) 863 .kr(25) 864 .channels(channels) 865 .width(3) 866 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 867 } 868 } 869 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8_LD64,multipixel_with_step)870 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8_LD64, multipixel_with_step) { 871 TEST_REQUIRES_ARM_NEON; 872 for (size_t channels = 1; channels <= 40; channels += 7) { 873 for (size_t step = 2; step <= 25; step++) { 874 DWConvMicrokernelTester() 875 .cr(8) 876 .kr(25) 877 .channels(channels) 878 .width(3) 879 .step(step) 880 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 881 } 882 } 883 } 884 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8_LD64,multipixel_with_output_stride)885 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8_LD64, multipixel_with_output_stride) { 886 TEST_REQUIRES_ARM_NEON; 887 for (size_t channels = 1; channels <= 40; channels += 7) { 888 DWConvMicrokernelTester() 889 .cr(8) 890 .kr(25) 891 .channels(8) 892 .width(5) 893 .output_stride(43) 894 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 895 } 896 } 897 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8_LD64,multipixel_with_qmin)898 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8_LD64, multipixel_with_qmin) { 899 TEST_REQUIRES_ARM_NEON; 900 for (size_t channels = 1; channels <= 40; channels += 7) { 901 DWConvMicrokernelTester() 902 .cr(8) 903 .kr(25) 904 .channels(channels) 905 .width(3) 906 .qmin(128) 907 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 908 } 909 } 910 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8_LD64,multipixel_with_qmax)911 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8_LD64, multipixel_with_qmax) { 912 TEST_REQUIRES_ARM_NEON; 913 for (size_t channels = 1; channels <= 40; channels += 7) { 914 DWConvMicrokernelTester() 915 .cr(8) 916 .kr(25) 917 .channels(channels) 918 .width(3) 919 .qmax(128) 920 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 921 } 922 } 923 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8_LD64,input_offset)924 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8_LD64, input_offset) { 925 TEST_REQUIRES_ARM_NEON; 926 for (uint32_t channels = 16; channels < 128; channels += 24) { 927 DWConvMicrokernelTester() 928 .cr(8) 929 .kr(25) 930 .channels(channels) 931 .input_offset(176) 932 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 933 } 934 } 935 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8_LD64,zero)936 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL8_LD64, zero) { 937 TEST_REQUIRES_ARM_NEON; 938 for (uint32_t mz = 0; mz < 25; mz++) { 939 for (uint32_t channels = 16; channels < 128; channels += 24) { 940 DWConvMicrokernelTester() 941 .cr(8) 942 .kr(25) 943 .channels(channels) 944 .input_offset(176) 945 .zero_index(mz) 946 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 947 } 948 } 949 } 950 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 951 952 953 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16,c_eq_8)954 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16, c_eq_8) { 955 TEST_REQUIRES_ARM_NEON; 956 DWConvMicrokernelTester() 957 .cr(8) 958 .kr(25) 959 .channels(8) 960 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 961 } 962 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16,c_div_8)963 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16, c_div_8) { 964 TEST_REQUIRES_ARM_NEON; 965 for (uint32_t channels = 16; channels < 128; channels += 24) { 966 DWConvMicrokernelTester() 967 .cr(8) 968 .kr(25) 969 .channels(channels) 970 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 971 } 972 } 973 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16,c_div_8_with_qmin)974 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16, c_div_8_with_qmin) { 975 TEST_REQUIRES_ARM_NEON; 976 for (uint32_t channels = 16; channels < 128; channels += 24) { 977 DWConvMicrokernelTester() 978 .cr(8) 979 .kr(25) 980 .channels(channels) 981 .qmin(128) 982 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 983 } 984 } 985 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16,c_div_8_with_qmax)986 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16, c_div_8_with_qmax) { 987 TEST_REQUIRES_ARM_NEON; 988 for (uint32_t channels = 16; channels < 128; channels += 24) { 989 DWConvMicrokernelTester() 990 .cr(8) 991 .kr(25) 992 .channels(channels) 993 .qmax(128) 994 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 995 } 996 } 997 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16,c_lt_8)998 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16, c_lt_8) { 999 TEST_REQUIRES_ARM_NEON; 1000 for (uint32_t channels = 1; channels < 8; channels++) { 1001 DWConvMicrokernelTester() 1002 .cr(8) 1003 .kr(25) 1004 .channels(channels) 1005 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1006 } 1007 } 1008 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16,c_gt_8)1009 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16, c_gt_8) { 1010 TEST_REQUIRES_ARM_NEON; 1011 for (uint32_t channels = 9; channels < 16; channels++) { 1012 DWConvMicrokernelTester() 1013 .cr(8) 1014 .kr(25) 1015 .channels(channels) 1016 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1017 } 1018 } 1019 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16,c_gt_8_with_qmin)1020 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16, c_gt_8_with_qmin) { 1021 TEST_REQUIRES_ARM_NEON; 1022 for (uint32_t channels = 9; channels < 16; channels++) { 1023 DWConvMicrokernelTester() 1024 .cr(8) 1025 .kr(25) 1026 .channels(channels) 1027 .qmin(128) 1028 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1029 } 1030 } 1031 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16,c_gt_8_with_qmax)1032 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16, c_gt_8_with_qmax) { 1033 TEST_REQUIRES_ARM_NEON; 1034 for (uint32_t channels = 9; channels < 16; channels++) { 1035 DWConvMicrokernelTester() 1036 .cr(8) 1037 .kr(25) 1038 .channels(channels) 1039 .qmax(128) 1040 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1041 } 1042 } 1043 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16,multipixel)1044 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16, multipixel) { 1045 TEST_REQUIRES_ARM_NEON; 1046 for (size_t channels = 1; channels <= 40; channels += 7) { 1047 DWConvMicrokernelTester() 1048 .cr(8) 1049 .kr(25) 1050 .channels(channels) 1051 .width(3) 1052 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1053 } 1054 } 1055 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16,multipixel_with_step)1056 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16, multipixel_with_step) { 1057 TEST_REQUIRES_ARM_NEON; 1058 for (size_t channels = 1; channels <= 40; channels += 7) { 1059 for (size_t step = 2; step <= 25; step++) { 1060 DWConvMicrokernelTester() 1061 .cr(8) 1062 .kr(25) 1063 .channels(channels) 1064 .width(3) 1065 .step(step) 1066 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1067 } 1068 } 1069 } 1070 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16,multipixel_with_output_stride)1071 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16, multipixel_with_output_stride) { 1072 TEST_REQUIRES_ARM_NEON; 1073 for (size_t channels = 1; channels <= 40; channels += 7) { 1074 DWConvMicrokernelTester() 1075 .cr(8) 1076 .kr(25) 1077 .channels(8) 1078 .width(5) 1079 .output_stride(43) 1080 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1081 } 1082 } 1083 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16,multipixel_with_qmin)1084 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16, multipixel_with_qmin) { 1085 TEST_REQUIRES_ARM_NEON; 1086 for (size_t channels = 1; channels <= 40; channels += 7) { 1087 DWConvMicrokernelTester() 1088 .cr(8) 1089 .kr(25) 1090 .channels(channels) 1091 .width(3) 1092 .qmin(128) 1093 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1094 } 1095 } 1096 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16,multipixel_with_qmax)1097 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16, multipixel_with_qmax) { 1098 TEST_REQUIRES_ARM_NEON; 1099 for (size_t channels = 1; channels <= 40; channels += 7) { 1100 DWConvMicrokernelTester() 1101 .cr(8) 1102 .kr(25) 1103 .channels(channels) 1104 .width(3) 1105 .qmax(128) 1106 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1107 } 1108 } 1109 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16,input_offset)1110 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16, input_offset) { 1111 TEST_REQUIRES_ARM_NEON; 1112 for (uint32_t channels = 16; channels < 128; channels += 24) { 1113 DWConvMicrokernelTester() 1114 .cr(8) 1115 .kr(25) 1116 .channels(channels) 1117 .input_offset(176) 1118 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1119 } 1120 } 1121 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16,zero)1122 TEST(QS8_DWCONV_MINMAX_RNDNU_UP8X25__NEON_MUL16, zero) { 1123 TEST_REQUIRES_ARM_NEON; 1124 for (uint32_t mz = 0; mz < 25; mz++) { 1125 for (uint32_t channels = 16; channels < 128; channels += 24) { 1126 DWConvMicrokernelTester() 1127 .cr(8) 1128 .kr(25) 1129 .channels(channels) 1130 .input_offset(176) 1131 .zero_index(mz) 1132 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up8x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1133 } 1134 } 1135 } 1136 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 1137 1138 1139 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD64,c_eq_16)1140 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD64, c_eq_16) { 1141 TEST_REQUIRES_ARM_NEON; 1142 DWConvMicrokernelTester() 1143 .cr(16) 1144 .kr(9) 1145 .channels(16) 1146 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1147 } 1148 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD64,c_div_16)1149 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD64, c_div_16) { 1150 TEST_REQUIRES_ARM_NEON; 1151 for (uint32_t channels = 32; channels < 256; channels += 48) { 1152 DWConvMicrokernelTester() 1153 .cr(16) 1154 .kr(9) 1155 .channels(channels) 1156 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1157 } 1158 } 1159 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD64,c_div_16_with_qmin)1160 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD64, c_div_16_with_qmin) { 1161 TEST_REQUIRES_ARM_NEON; 1162 for (uint32_t channels = 32; channels < 256; channels += 48) { 1163 DWConvMicrokernelTester() 1164 .cr(16) 1165 .kr(9) 1166 .channels(channels) 1167 .qmin(128) 1168 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1169 } 1170 } 1171 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD64,c_div_16_with_qmax)1172 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD64, c_div_16_with_qmax) { 1173 TEST_REQUIRES_ARM_NEON; 1174 for (uint32_t channels = 32; channels < 256; channels += 48) { 1175 DWConvMicrokernelTester() 1176 .cr(16) 1177 .kr(9) 1178 .channels(channels) 1179 .qmax(128) 1180 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1181 } 1182 } 1183 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD64,c_lt_16)1184 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD64, c_lt_16) { 1185 TEST_REQUIRES_ARM_NEON; 1186 for (uint32_t channels = 1; channels < 16; channels++) { 1187 DWConvMicrokernelTester() 1188 .cr(16) 1189 .kr(9) 1190 .channels(channels) 1191 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1192 } 1193 } 1194 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD64,c_gt_16)1195 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD64, c_gt_16) { 1196 TEST_REQUIRES_ARM_NEON; 1197 for (uint32_t channels = 17; channels < 32; channels++) { 1198 DWConvMicrokernelTester() 1199 .cr(16) 1200 .kr(9) 1201 .channels(channels) 1202 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1203 } 1204 } 1205 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD64,c_gt_16_with_qmin)1206 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD64, c_gt_16_with_qmin) { 1207 TEST_REQUIRES_ARM_NEON; 1208 for (uint32_t channels = 17; channels < 32; channels++) { 1209 DWConvMicrokernelTester() 1210 .cr(16) 1211 .kr(9) 1212 .channels(channels) 1213 .qmin(128) 1214 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1215 } 1216 } 1217 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD64,c_gt_16_with_qmax)1218 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD64, c_gt_16_with_qmax) { 1219 TEST_REQUIRES_ARM_NEON; 1220 for (uint32_t channels = 17; channels < 32; channels++) { 1221 DWConvMicrokernelTester() 1222 .cr(16) 1223 .kr(9) 1224 .channels(channels) 1225 .qmax(128) 1226 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1227 } 1228 } 1229 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD64,multipixel)1230 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD64, multipixel) { 1231 TEST_REQUIRES_ARM_NEON; 1232 for (size_t channels = 1; channels <= 80; channels += 15) { 1233 DWConvMicrokernelTester() 1234 .cr(16) 1235 .kr(9) 1236 .channels(channels) 1237 .width(3) 1238 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1239 } 1240 } 1241 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD64,multipixel_with_step)1242 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD64, multipixel_with_step) { 1243 TEST_REQUIRES_ARM_NEON; 1244 for (size_t channels = 1; channels <= 80; channels += 15) { 1245 for (size_t step = 2; step <= 9; step++) { 1246 DWConvMicrokernelTester() 1247 .cr(16) 1248 .kr(9) 1249 .channels(channels) 1250 .width(3) 1251 .step(step) 1252 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1253 } 1254 } 1255 } 1256 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD64,multipixel_with_output_stride)1257 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD64, multipixel_with_output_stride) { 1258 TEST_REQUIRES_ARM_NEON; 1259 for (size_t channels = 1; channels <= 80; channels += 15) { 1260 DWConvMicrokernelTester() 1261 .cr(16) 1262 .kr(9) 1263 .channels(16) 1264 .width(5) 1265 .output_stride(83) 1266 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1267 } 1268 } 1269 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD64,multipixel_with_qmin)1270 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD64, multipixel_with_qmin) { 1271 TEST_REQUIRES_ARM_NEON; 1272 for (size_t channels = 1; channels <= 80; channels += 15) { 1273 DWConvMicrokernelTester() 1274 .cr(16) 1275 .kr(9) 1276 .channels(channels) 1277 .width(3) 1278 .qmin(128) 1279 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1280 } 1281 } 1282 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD64,multipixel_with_qmax)1283 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD64, multipixel_with_qmax) { 1284 TEST_REQUIRES_ARM_NEON; 1285 for (size_t channels = 1; channels <= 80; channels += 15) { 1286 DWConvMicrokernelTester() 1287 .cr(16) 1288 .kr(9) 1289 .channels(channels) 1290 .width(3) 1291 .qmax(128) 1292 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1293 } 1294 } 1295 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD64,input_offset)1296 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD64, input_offset) { 1297 TEST_REQUIRES_ARM_NEON; 1298 for (uint32_t channels = 32; channels < 256; channels += 48) { 1299 DWConvMicrokernelTester() 1300 .cr(16) 1301 .kr(9) 1302 .channels(channels) 1303 .input_offset(304) 1304 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1305 } 1306 } 1307 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD64,zero)1308 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD64, zero) { 1309 TEST_REQUIRES_ARM_NEON; 1310 for (uint32_t mz = 0; mz < 9; mz++) { 1311 for (uint32_t channels = 32; channels < 256; channels += 48) { 1312 DWConvMicrokernelTester() 1313 .cr(16) 1314 .kr(9) 1315 .channels(channels) 1316 .input_offset(304) 1317 .zero_index(mz) 1318 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1319 } 1320 } 1321 } 1322 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 1323 1324 1325 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD128,c_eq_16)1326 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD128, c_eq_16) { 1327 TEST_REQUIRES_ARM_NEON; 1328 DWConvMicrokernelTester() 1329 .cr(16) 1330 .kr(9) 1331 .channels(16) 1332 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mla8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1333 } 1334 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD128,c_div_16)1335 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD128, c_div_16) { 1336 TEST_REQUIRES_ARM_NEON; 1337 for (uint32_t channels = 32; channels < 256; channels += 48) { 1338 DWConvMicrokernelTester() 1339 .cr(16) 1340 .kr(9) 1341 .channels(channels) 1342 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mla8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1343 } 1344 } 1345 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD128,c_div_16_with_qmin)1346 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD128, c_div_16_with_qmin) { 1347 TEST_REQUIRES_ARM_NEON; 1348 for (uint32_t channels = 32; channels < 256; channels += 48) { 1349 DWConvMicrokernelTester() 1350 .cr(16) 1351 .kr(9) 1352 .channels(channels) 1353 .qmin(128) 1354 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mla8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1355 } 1356 } 1357 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD128,c_div_16_with_qmax)1358 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD128, c_div_16_with_qmax) { 1359 TEST_REQUIRES_ARM_NEON; 1360 for (uint32_t channels = 32; channels < 256; channels += 48) { 1361 DWConvMicrokernelTester() 1362 .cr(16) 1363 .kr(9) 1364 .channels(channels) 1365 .qmax(128) 1366 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mla8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1367 } 1368 } 1369 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD128,c_lt_16)1370 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD128, c_lt_16) { 1371 TEST_REQUIRES_ARM_NEON; 1372 for (uint32_t channels = 1; channels < 16; channels++) { 1373 DWConvMicrokernelTester() 1374 .cr(16) 1375 .kr(9) 1376 .channels(channels) 1377 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mla8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1378 } 1379 } 1380 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD128,c_gt_16)1381 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD128, c_gt_16) { 1382 TEST_REQUIRES_ARM_NEON; 1383 for (uint32_t channels = 17; channels < 32; channels++) { 1384 DWConvMicrokernelTester() 1385 .cr(16) 1386 .kr(9) 1387 .channels(channels) 1388 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mla8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1389 } 1390 } 1391 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD128,c_gt_16_with_qmin)1392 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD128, c_gt_16_with_qmin) { 1393 TEST_REQUIRES_ARM_NEON; 1394 for (uint32_t channels = 17; channels < 32; channels++) { 1395 DWConvMicrokernelTester() 1396 .cr(16) 1397 .kr(9) 1398 .channels(channels) 1399 .qmin(128) 1400 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mla8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1401 } 1402 } 1403 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD128,c_gt_16_with_qmax)1404 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD128, c_gt_16_with_qmax) { 1405 TEST_REQUIRES_ARM_NEON; 1406 for (uint32_t channels = 17; channels < 32; channels++) { 1407 DWConvMicrokernelTester() 1408 .cr(16) 1409 .kr(9) 1410 .channels(channels) 1411 .qmax(128) 1412 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mla8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1413 } 1414 } 1415 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD128,multipixel)1416 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD128, multipixel) { 1417 TEST_REQUIRES_ARM_NEON; 1418 for (size_t channels = 1; channels <= 80; channels += 15) { 1419 DWConvMicrokernelTester() 1420 .cr(16) 1421 .kr(9) 1422 .channels(channels) 1423 .width(3) 1424 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mla8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1425 } 1426 } 1427 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD128,multipixel_with_step)1428 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD128, multipixel_with_step) { 1429 TEST_REQUIRES_ARM_NEON; 1430 for (size_t channels = 1; channels <= 80; channels += 15) { 1431 for (size_t step = 2; step <= 9; step++) { 1432 DWConvMicrokernelTester() 1433 .cr(16) 1434 .kr(9) 1435 .channels(channels) 1436 .width(3) 1437 .step(step) 1438 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mla8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1439 } 1440 } 1441 } 1442 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD128,multipixel_with_output_stride)1443 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD128, multipixel_with_output_stride) { 1444 TEST_REQUIRES_ARM_NEON; 1445 for (size_t channels = 1; channels <= 80; channels += 15) { 1446 DWConvMicrokernelTester() 1447 .cr(16) 1448 .kr(9) 1449 .channels(16) 1450 .width(5) 1451 .output_stride(83) 1452 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mla8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1453 } 1454 } 1455 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD128,multipixel_with_qmin)1456 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD128, multipixel_with_qmin) { 1457 TEST_REQUIRES_ARM_NEON; 1458 for (size_t channels = 1; channels <= 80; channels += 15) { 1459 DWConvMicrokernelTester() 1460 .cr(16) 1461 .kr(9) 1462 .channels(channels) 1463 .width(3) 1464 .qmin(128) 1465 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mla8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1466 } 1467 } 1468 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD128,multipixel_with_qmax)1469 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD128, multipixel_with_qmax) { 1470 TEST_REQUIRES_ARM_NEON; 1471 for (size_t channels = 1; channels <= 80; channels += 15) { 1472 DWConvMicrokernelTester() 1473 .cr(16) 1474 .kr(9) 1475 .channels(channels) 1476 .width(3) 1477 .qmax(128) 1478 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mla8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1479 } 1480 } 1481 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD128,input_offset)1482 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD128, input_offset) { 1483 TEST_REQUIRES_ARM_NEON; 1484 for (uint32_t channels = 32; channels < 256; channels += 48) { 1485 DWConvMicrokernelTester() 1486 .cr(16) 1487 .kr(9) 1488 .channels(channels) 1489 .input_offset(304) 1490 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mla8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1491 } 1492 } 1493 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD128,zero)1494 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MLA8_LD128, zero) { 1495 TEST_REQUIRES_ARM_NEON; 1496 for (uint32_t mz = 0; mz < 9; mz++) { 1497 for (uint32_t channels = 32; channels < 256; channels += 48) { 1498 DWConvMicrokernelTester() 1499 .cr(16) 1500 .kr(9) 1501 .channels(channels) 1502 .input_offset(304) 1503 .zero_index(mz) 1504 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mla8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1505 } 1506 } 1507 } 1508 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 1509 1510 1511 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD64,c_eq_16)1512 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD64, c_eq_16) { 1513 TEST_REQUIRES_ARM_NEON; 1514 DWConvMicrokernelTester() 1515 .cr(16) 1516 .kr(9) 1517 .channels(16) 1518 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1519 } 1520 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD64,c_div_16)1521 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD64, c_div_16) { 1522 TEST_REQUIRES_ARM_NEON; 1523 for (uint32_t channels = 32; channels < 256; channels += 48) { 1524 DWConvMicrokernelTester() 1525 .cr(16) 1526 .kr(9) 1527 .channels(channels) 1528 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1529 } 1530 } 1531 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD64,c_div_16_with_qmin)1532 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD64, c_div_16_with_qmin) { 1533 TEST_REQUIRES_ARM_NEON; 1534 for (uint32_t channels = 32; channels < 256; channels += 48) { 1535 DWConvMicrokernelTester() 1536 .cr(16) 1537 .kr(9) 1538 .channels(channels) 1539 .qmin(128) 1540 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1541 } 1542 } 1543 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD64,c_div_16_with_qmax)1544 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD64, c_div_16_with_qmax) { 1545 TEST_REQUIRES_ARM_NEON; 1546 for (uint32_t channels = 32; channels < 256; channels += 48) { 1547 DWConvMicrokernelTester() 1548 .cr(16) 1549 .kr(9) 1550 .channels(channels) 1551 .qmax(128) 1552 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1553 } 1554 } 1555 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD64,c_lt_16)1556 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD64, c_lt_16) { 1557 TEST_REQUIRES_ARM_NEON; 1558 for (uint32_t channels = 1; channels < 16; channels++) { 1559 DWConvMicrokernelTester() 1560 .cr(16) 1561 .kr(9) 1562 .channels(channels) 1563 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1564 } 1565 } 1566 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD64,c_gt_16)1567 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD64, c_gt_16) { 1568 TEST_REQUIRES_ARM_NEON; 1569 for (uint32_t channels = 17; channels < 32; channels++) { 1570 DWConvMicrokernelTester() 1571 .cr(16) 1572 .kr(9) 1573 .channels(channels) 1574 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1575 } 1576 } 1577 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD64,c_gt_16_with_qmin)1578 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD64, c_gt_16_with_qmin) { 1579 TEST_REQUIRES_ARM_NEON; 1580 for (uint32_t channels = 17; channels < 32; channels++) { 1581 DWConvMicrokernelTester() 1582 .cr(16) 1583 .kr(9) 1584 .channels(channels) 1585 .qmin(128) 1586 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1587 } 1588 } 1589 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD64,c_gt_16_with_qmax)1590 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD64, c_gt_16_with_qmax) { 1591 TEST_REQUIRES_ARM_NEON; 1592 for (uint32_t channels = 17; channels < 32; channels++) { 1593 DWConvMicrokernelTester() 1594 .cr(16) 1595 .kr(9) 1596 .channels(channels) 1597 .qmax(128) 1598 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1599 } 1600 } 1601 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD64,multipixel)1602 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD64, multipixel) { 1603 TEST_REQUIRES_ARM_NEON; 1604 for (size_t channels = 1; channels <= 80; channels += 15) { 1605 DWConvMicrokernelTester() 1606 .cr(16) 1607 .kr(9) 1608 .channels(channels) 1609 .width(3) 1610 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1611 } 1612 } 1613 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD64,multipixel_with_step)1614 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD64, multipixel_with_step) { 1615 TEST_REQUIRES_ARM_NEON; 1616 for (size_t channels = 1; channels <= 80; channels += 15) { 1617 for (size_t step = 2; step <= 9; step++) { 1618 DWConvMicrokernelTester() 1619 .cr(16) 1620 .kr(9) 1621 .channels(channels) 1622 .width(3) 1623 .step(step) 1624 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1625 } 1626 } 1627 } 1628 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD64,multipixel_with_output_stride)1629 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD64, multipixel_with_output_stride) { 1630 TEST_REQUIRES_ARM_NEON; 1631 for (size_t channels = 1; channels <= 80; channels += 15) { 1632 DWConvMicrokernelTester() 1633 .cr(16) 1634 .kr(9) 1635 .channels(16) 1636 .width(5) 1637 .output_stride(83) 1638 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1639 } 1640 } 1641 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD64,multipixel_with_qmin)1642 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD64, multipixel_with_qmin) { 1643 TEST_REQUIRES_ARM_NEON; 1644 for (size_t channels = 1; channels <= 80; channels += 15) { 1645 DWConvMicrokernelTester() 1646 .cr(16) 1647 .kr(9) 1648 .channels(channels) 1649 .width(3) 1650 .qmin(128) 1651 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1652 } 1653 } 1654 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD64,multipixel_with_qmax)1655 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD64, multipixel_with_qmax) { 1656 TEST_REQUIRES_ARM_NEON; 1657 for (size_t channels = 1; channels <= 80; channels += 15) { 1658 DWConvMicrokernelTester() 1659 .cr(16) 1660 .kr(9) 1661 .channels(channels) 1662 .width(3) 1663 .qmax(128) 1664 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1665 } 1666 } 1667 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD64,input_offset)1668 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD64, input_offset) { 1669 TEST_REQUIRES_ARM_NEON; 1670 for (uint32_t channels = 32; channels < 256; channels += 48) { 1671 DWConvMicrokernelTester() 1672 .cr(16) 1673 .kr(9) 1674 .channels(channels) 1675 .input_offset(304) 1676 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1677 } 1678 } 1679 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD64,zero)1680 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD64, zero) { 1681 TEST_REQUIRES_ARM_NEON; 1682 for (uint32_t mz = 0; mz < 9; mz++) { 1683 for (uint32_t channels = 32; channels < 256; channels += 48) { 1684 DWConvMicrokernelTester() 1685 .cr(16) 1686 .kr(9) 1687 .channels(channels) 1688 .input_offset(304) 1689 .zero_index(mz) 1690 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1691 } 1692 } 1693 } 1694 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 1695 1696 1697 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD128,c_eq_16)1698 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD128, c_eq_16) { 1699 TEST_REQUIRES_ARM_NEON; 1700 DWConvMicrokernelTester() 1701 .cr(16) 1702 .kr(9) 1703 .channels(16) 1704 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1705 } 1706 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD128,c_div_16)1707 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD128, c_div_16) { 1708 TEST_REQUIRES_ARM_NEON; 1709 for (uint32_t channels = 32; channels < 256; channels += 48) { 1710 DWConvMicrokernelTester() 1711 .cr(16) 1712 .kr(9) 1713 .channels(channels) 1714 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1715 } 1716 } 1717 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD128,c_div_16_with_qmin)1718 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD128, c_div_16_with_qmin) { 1719 TEST_REQUIRES_ARM_NEON; 1720 for (uint32_t channels = 32; channels < 256; channels += 48) { 1721 DWConvMicrokernelTester() 1722 .cr(16) 1723 .kr(9) 1724 .channels(channels) 1725 .qmin(128) 1726 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1727 } 1728 } 1729 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD128,c_div_16_with_qmax)1730 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD128, c_div_16_with_qmax) { 1731 TEST_REQUIRES_ARM_NEON; 1732 for (uint32_t channels = 32; channels < 256; channels += 48) { 1733 DWConvMicrokernelTester() 1734 .cr(16) 1735 .kr(9) 1736 .channels(channels) 1737 .qmax(128) 1738 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1739 } 1740 } 1741 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD128,c_lt_16)1742 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD128, c_lt_16) { 1743 TEST_REQUIRES_ARM_NEON; 1744 for (uint32_t channels = 1; channels < 16; channels++) { 1745 DWConvMicrokernelTester() 1746 .cr(16) 1747 .kr(9) 1748 .channels(channels) 1749 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1750 } 1751 } 1752 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD128,c_gt_16)1753 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD128, c_gt_16) { 1754 TEST_REQUIRES_ARM_NEON; 1755 for (uint32_t channels = 17; channels < 32; channels++) { 1756 DWConvMicrokernelTester() 1757 .cr(16) 1758 .kr(9) 1759 .channels(channels) 1760 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1761 } 1762 } 1763 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD128,c_gt_16_with_qmin)1764 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD128, c_gt_16_with_qmin) { 1765 TEST_REQUIRES_ARM_NEON; 1766 for (uint32_t channels = 17; channels < 32; channels++) { 1767 DWConvMicrokernelTester() 1768 .cr(16) 1769 .kr(9) 1770 .channels(channels) 1771 .qmin(128) 1772 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1773 } 1774 } 1775 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD128,c_gt_16_with_qmax)1776 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD128, c_gt_16_with_qmax) { 1777 TEST_REQUIRES_ARM_NEON; 1778 for (uint32_t channels = 17; channels < 32; channels++) { 1779 DWConvMicrokernelTester() 1780 .cr(16) 1781 .kr(9) 1782 .channels(channels) 1783 .qmax(128) 1784 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1785 } 1786 } 1787 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD128,multipixel)1788 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD128, multipixel) { 1789 TEST_REQUIRES_ARM_NEON; 1790 for (size_t channels = 1; channels <= 80; channels += 15) { 1791 DWConvMicrokernelTester() 1792 .cr(16) 1793 .kr(9) 1794 .channels(channels) 1795 .width(3) 1796 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1797 } 1798 } 1799 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD128,multipixel_with_step)1800 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD128, multipixel_with_step) { 1801 TEST_REQUIRES_ARM_NEON; 1802 for (size_t channels = 1; channels <= 80; channels += 15) { 1803 for (size_t step = 2; step <= 9; step++) { 1804 DWConvMicrokernelTester() 1805 .cr(16) 1806 .kr(9) 1807 .channels(channels) 1808 .width(3) 1809 .step(step) 1810 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1811 } 1812 } 1813 } 1814 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD128,multipixel_with_output_stride)1815 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD128, multipixel_with_output_stride) { 1816 TEST_REQUIRES_ARM_NEON; 1817 for (size_t channels = 1; channels <= 80; channels += 15) { 1818 DWConvMicrokernelTester() 1819 .cr(16) 1820 .kr(9) 1821 .channels(16) 1822 .width(5) 1823 .output_stride(83) 1824 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1825 } 1826 } 1827 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD128,multipixel_with_qmin)1828 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD128, multipixel_with_qmin) { 1829 TEST_REQUIRES_ARM_NEON; 1830 for (size_t channels = 1; channels <= 80; channels += 15) { 1831 DWConvMicrokernelTester() 1832 .cr(16) 1833 .kr(9) 1834 .channels(channels) 1835 .width(3) 1836 .qmin(128) 1837 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1838 } 1839 } 1840 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD128,multipixel_with_qmax)1841 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD128, multipixel_with_qmax) { 1842 TEST_REQUIRES_ARM_NEON; 1843 for (size_t channels = 1; channels <= 80; channels += 15) { 1844 DWConvMicrokernelTester() 1845 .cr(16) 1846 .kr(9) 1847 .channels(channels) 1848 .width(3) 1849 .qmax(128) 1850 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1851 } 1852 } 1853 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD128,input_offset)1854 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD128, input_offset) { 1855 TEST_REQUIRES_ARM_NEON; 1856 for (uint32_t channels = 32; channels < 256; channels += 48) { 1857 DWConvMicrokernelTester() 1858 .cr(16) 1859 .kr(9) 1860 .channels(channels) 1861 .input_offset(304) 1862 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1863 } 1864 } 1865 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD128,zero)1866 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL8_LD128, zero) { 1867 TEST_REQUIRES_ARM_NEON; 1868 for (uint32_t mz = 0; mz < 9; mz++) { 1869 for (uint32_t channels = 32; channels < 256; channels += 48) { 1870 DWConvMicrokernelTester() 1871 .cr(16) 1872 .kr(9) 1873 .channels(channels) 1874 .input_offset(304) 1875 .zero_index(mz) 1876 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1877 } 1878 } 1879 } 1880 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 1881 1882 1883 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16,c_eq_16)1884 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16, c_eq_16) { 1885 TEST_REQUIRES_ARM_NEON; 1886 DWConvMicrokernelTester() 1887 .cr(16) 1888 .kr(9) 1889 .channels(16) 1890 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1891 } 1892 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16,c_div_16)1893 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16, c_div_16) { 1894 TEST_REQUIRES_ARM_NEON; 1895 for (uint32_t channels = 32; channels < 256; channels += 48) { 1896 DWConvMicrokernelTester() 1897 .cr(16) 1898 .kr(9) 1899 .channels(channels) 1900 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1901 } 1902 } 1903 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16,c_div_16_with_qmin)1904 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16, c_div_16_with_qmin) { 1905 TEST_REQUIRES_ARM_NEON; 1906 for (uint32_t channels = 32; channels < 256; channels += 48) { 1907 DWConvMicrokernelTester() 1908 .cr(16) 1909 .kr(9) 1910 .channels(channels) 1911 .qmin(128) 1912 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1913 } 1914 } 1915 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16,c_div_16_with_qmax)1916 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16, c_div_16_with_qmax) { 1917 TEST_REQUIRES_ARM_NEON; 1918 for (uint32_t channels = 32; channels < 256; channels += 48) { 1919 DWConvMicrokernelTester() 1920 .cr(16) 1921 .kr(9) 1922 .channels(channels) 1923 .qmax(128) 1924 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1925 } 1926 } 1927 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16,c_lt_16)1928 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16, c_lt_16) { 1929 TEST_REQUIRES_ARM_NEON; 1930 for (uint32_t channels = 1; channels < 16; channels++) { 1931 DWConvMicrokernelTester() 1932 .cr(16) 1933 .kr(9) 1934 .channels(channels) 1935 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1936 } 1937 } 1938 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16,c_gt_16)1939 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16, c_gt_16) { 1940 TEST_REQUIRES_ARM_NEON; 1941 for (uint32_t channels = 17; channels < 32; channels++) { 1942 DWConvMicrokernelTester() 1943 .cr(16) 1944 .kr(9) 1945 .channels(channels) 1946 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1947 } 1948 } 1949 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16,c_gt_16_with_qmin)1950 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16, c_gt_16_with_qmin) { 1951 TEST_REQUIRES_ARM_NEON; 1952 for (uint32_t channels = 17; channels < 32; channels++) { 1953 DWConvMicrokernelTester() 1954 .cr(16) 1955 .kr(9) 1956 .channels(channels) 1957 .qmin(128) 1958 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1959 } 1960 } 1961 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16,c_gt_16_with_qmax)1962 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16, c_gt_16_with_qmax) { 1963 TEST_REQUIRES_ARM_NEON; 1964 for (uint32_t channels = 17; channels < 32; channels++) { 1965 DWConvMicrokernelTester() 1966 .cr(16) 1967 .kr(9) 1968 .channels(channels) 1969 .qmax(128) 1970 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1971 } 1972 } 1973 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16,multipixel)1974 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16, multipixel) { 1975 TEST_REQUIRES_ARM_NEON; 1976 for (size_t channels = 1; channels <= 80; channels += 15) { 1977 DWConvMicrokernelTester() 1978 .cr(16) 1979 .kr(9) 1980 .channels(channels) 1981 .width(3) 1982 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1983 } 1984 } 1985 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16,multipixel_with_step)1986 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16, multipixel_with_step) { 1987 TEST_REQUIRES_ARM_NEON; 1988 for (size_t channels = 1; channels <= 80; channels += 15) { 1989 for (size_t step = 2; step <= 9; step++) { 1990 DWConvMicrokernelTester() 1991 .cr(16) 1992 .kr(9) 1993 .channels(channels) 1994 .width(3) 1995 .step(step) 1996 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 1997 } 1998 } 1999 } 2000 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16,multipixel_with_output_stride)2001 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16, multipixel_with_output_stride) { 2002 TEST_REQUIRES_ARM_NEON; 2003 for (size_t channels = 1; channels <= 80; channels += 15) { 2004 DWConvMicrokernelTester() 2005 .cr(16) 2006 .kr(9) 2007 .channels(16) 2008 .width(5) 2009 .output_stride(83) 2010 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2011 } 2012 } 2013 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16,multipixel_with_qmin)2014 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16, multipixel_with_qmin) { 2015 TEST_REQUIRES_ARM_NEON; 2016 for (size_t channels = 1; channels <= 80; channels += 15) { 2017 DWConvMicrokernelTester() 2018 .cr(16) 2019 .kr(9) 2020 .channels(channels) 2021 .width(3) 2022 .qmin(128) 2023 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2024 } 2025 } 2026 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16,multipixel_with_qmax)2027 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16, multipixel_with_qmax) { 2028 TEST_REQUIRES_ARM_NEON; 2029 for (size_t channels = 1; channels <= 80; channels += 15) { 2030 DWConvMicrokernelTester() 2031 .cr(16) 2032 .kr(9) 2033 .channels(channels) 2034 .width(3) 2035 .qmax(128) 2036 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2037 } 2038 } 2039 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16,input_offset)2040 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16, input_offset) { 2041 TEST_REQUIRES_ARM_NEON; 2042 for (uint32_t channels = 32; channels < 256; channels += 48) { 2043 DWConvMicrokernelTester() 2044 .cr(16) 2045 .kr(9) 2046 .channels(channels) 2047 .input_offset(304) 2048 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2049 } 2050 } 2051 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16,zero)2052 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X9__NEON_MUL16, zero) { 2053 TEST_REQUIRES_ARM_NEON; 2054 for (uint32_t mz = 0; mz < 9; mz++) { 2055 for (uint32_t channels = 32; channels < 256; channels += 48) { 2056 DWConvMicrokernelTester() 2057 .cr(16) 2058 .kr(9) 2059 .channels(channels) 2060 .input_offset(304) 2061 .zero_index(mz) 2062 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2063 } 2064 } 2065 } 2066 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 2067 2068 2069 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD64,c_eq_16)2070 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD64, c_eq_16) { 2071 TEST_REQUIRES_ARM_NEON; 2072 DWConvMicrokernelTester() 2073 .cr(16) 2074 .kr(25) 2075 .channels(16) 2076 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2077 } 2078 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD64,c_div_16)2079 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD64, c_div_16) { 2080 TEST_REQUIRES_ARM_NEON; 2081 for (uint32_t channels = 32; channels < 256; channels += 48) { 2082 DWConvMicrokernelTester() 2083 .cr(16) 2084 .kr(25) 2085 .channels(channels) 2086 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2087 } 2088 } 2089 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD64,c_div_16_with_qmin)2090 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD64, c_div_16_with_qmin) { 2091 TEST_REQUIRES_ARM_NEON; 2092 for (uint32_t channels = 32; channels < 256; channels += 48) { 2093 DWConvMicrokernelTester() 2094 .cr(16) 2095 .kr(25) 2096 .channels(channels) 2097 .qmin(128) 2098 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2099 } 2100 } 2101 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD64,c_div_16_with_qmax)2102 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD64, c_div_16_with_qmax) { 2103 TEST_REQUIRES_ARM_NEON; 2104 for (uint32_t channels = 32; channels < 256; channels += 48) { 2105 DWConvMicrokernelTester() 2106 .cr(16) 2107 .kr(25) 2108 .channels(channels) 2109 .qmax(128) 2110 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2111 } 2112 } 2113 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD64,c_lt_16)2114 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD64, c_lt_16) { 2115 TEST_REQUIRES_ARM_NEON; 2116 for (uint32_t channels = 1; channels < 16; channels++) { 2117 DWConvMicrokernelTester() 2118 .cr(16) 2119 .kr(25) 2120 .channels(channels) 2121 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2122 } 2123 } 2124 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD64,c_gt_16)2125 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD64, c_gt_16) { 2126 TEST_REQUIRES_ARM_NEON; 2127 for (uint32_t channels = 17; channels < 32; channels++) { 2128 DWConvMicrokernelTester() 2129 .cr(16) 2130 .kr(25) 2131 .channels(channels) 2132 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2133 } 2134 } 2135 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD64,c_gt_16_with_qmin)2136 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD64, c_gt_16_with_qmin) { 2137 TEST_REQUIRES_ARM_NEON; 2138 for (uint32_t channels = 17; channels < 32; channels++) { 2139 DWConvMicrokernelTester() 2140 .cr(16) 2141 .kr(25) 2142 .channels(channels) 2143 .qmin(128) 2144 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2145 } 2146 } 2147 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD64,c_gt_16_with_qmax)2148 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD64, c_gt_16_with_qmax) { 2149 TEST_REQUIRES_ARM_NEON; 2150 for (uint32_t channels = 17; channels < 32; channels++) { 2151 DWConvMicrokernelTester() 2152 .cr(16) 2153 .kr(25) 2154 .channels(channels) 2155 .qmax(128) 2156 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2157 } 2158 } 2159 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD64,multipixel)2160 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD64, multipixel) { 2161 TEST_REQUIRES_ARM_NEON; 2162 for (size_t channels = 1; channels <= 80; channels += 15) { 2163 DWConvMicrokernelTester() 2164 .cr(16) 2165 .kr(25) 2166 .channels(channels) 2167 .width(3) 2168 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2169 } 2170 } 2171 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD64,multipixel_with_step)2172 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD64, multipixel_with_step) { 2173 TEST_REQUIRES_ARM_NEON; 2174 for (size_t channels = 1; channels <= 80; channels += 15) { 2175 for (size_t step = 2; step <= 25; step++) { 2176 DWConvMicrokernelTester() 2177 .cr(16) 2178 .kr(25) 2179 .channels(channels) 2180 .width(3) 2181 .step(step) 2182 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2183 } 2184 } 2185 } 2186 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD64,multipixel_with_output_stride)2187 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD64, multipixel_with_output_stride) { 2188 TEST_REQUIRES_ARM_NEON; 2189 for (size_t channels = 1; channels <= 80; channels += 15) { 2190 DWConvMicrokernelTester() 2191 .cr(16) 2192 .kr(25) 2193 .channels(16) 2194 .width(5) 2195 .output_stride(83) 2196 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2197 } 2198 } 2199 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD64,multipixel_with_qmin)2200 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD64, multipixel_with_qmin) { 2201 TEST_REQUIRES_ARM_NEON; 2202 for (size_t channels = 1; channels <= 80; channels += 15) { 2203 DWConvMicrokernelTester() 2204 .cr(16) 2205 .kr(25) 2206 .channels(channels) 2207 .width(3) 2208 .qmin(128) 2209 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2210 } 2211 } 2212 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD64,multipixel_with_qmax)2213 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD64, multipixel_with_qmax) { 2214 TEST_REQUIRES_ARM_NEON; 2215 for (size_t channels = 1; channels <= 80; channels += 15) { 2216 DWConvMicrokernelTester() 2217 .cr(16) 2218 .kr(25) 2219 .channels(channels) 2220 .width(3) 2221 .qmax(128) 2222 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2223 } 2224 } 2225 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD64,input_offset)2226 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD64, input_offset) { 2227 TEST_REQUIRES_ARM_NEON; 2228 for (uint32_t channels = 32; channels < 256; channels += 48) { 2229 DWConvMicrokernelTester() 2230 .cr(16) 2231 .kr(25) 2232 .channels(channels) 2233 .input_offset(304) 2234 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2235 } 2236 } 2237 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD64,zero)2238 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD64, zero) { 2239 TEST_REQUIRES_ARM_NEON; 2240 for (uint32_t mz = 0; mz < 25; mz++) { 2241 for (uint32_t channels = 32; channels < 256; channels += 48) { 2242 DWConvMicrokernelTester() 2243 .cr(16) 2244 .kr(25) 2245 .channels(channels) 2246 .input_offset(304) 2247 .zero_index(mz) 2248 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mla8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2249 } 2250 } 2251 } 2252 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 2253 2254 2255 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD128,c_eq_16)2256 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD128, c_eq_16) { 2257 TEST_REQUIRES_ARM_NEON; 2258 DWConvMicrokernelTester() 2259 .cr(16) 2260 .kr(25) 2261 .channels(16) 2262 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mla8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2263 } 2264 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD128,c_div_16)2265 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD128, c_div_16) { 2266 TEST_REQUIRES_ARM_NEON; 2267 for (uint32_t channels = 32; channels < 256; channels += 48) { 2268 DWConvMicrokernelTester() 2269 .cr(16) 2270 .kr(25) 2271 .channels(channels) 2272 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mla8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2273 } 2274 } 2275 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD128,c_div_16_with_qmin)2276 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD128, c_div_16_with_qmin) { 2277 TEST_REQUIRES_ARM_NEON; 2278 for (uint32_t channels = 32; channels < 256; channels += 48) { 2279 DWConvMicrokernelTester() 2280 .cr(16) 2281 .kr(25) 2282 .channels(channels) 2283 .qmin(128) 2284 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mla8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2285 } 2286 } 2287 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD128,c_div_16_with_qmax)2288 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD128, c_div_16_with_qmax) { 2289 TEST_REQUIRES_ARM_NEON; 2290 for (uint32_t channels = 32; channels < 256; channels += 48) { 2291 DWConvMicrokernelTester() 2292 .cr(16) 2293 .kr(25) 2294 .channels(channels) 2295 .qmax(128) 2296 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mla8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2297 } 2298 } 2299 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD128,c_lt_16)2300 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD128, c_lt_16) { 2301 TEST_REQUIRES_ARM_NEON; 2302 for (uint32_t channels = 1; channels < 16; channels++) { 2303 DWConvMicrokernelTester() 2304 .cr(16) 2305 .kr(25) 2306 .channels(channels) 2307 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mla8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2308 } 2309 } 2310 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD128,c_gt_16)2311 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD128, c_gt_16) { 2312 TEST_REQUIRES_ARM_NEON; 2313 for (uint32_t channels = 17; channels < 32; channels++) { 2314 DWConvMicrokernelTester() 2315 .cr(16) 2316 .kr(25) 2317 .channels(channels) 2318 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mla8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2319 } 2320 } 2321 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD128,c_gt_16_with_qmin)2322 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD128, c_gt_16_with_qmin) { 2323 TEST_REQUIRES_ARM_NEON; 2324 for (uint32_t channels = 17; channels < 32; channels++) { 2325 DWConvMicrokernelTester() 2326 .cr(16) 2327 .kr(25) 2328 .channels(channels) 2329 .qmin(128) 2330 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mla8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2331 } 2332 } 2333 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD128,c_gt_16_with_qmax)2334 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD128, c_gt_16_with_qmax) { 2335 TEST_REQUIRES_ARM_NEON; 2336 for (uint32_t channels = 17; channels < 32; channels++) { 2337 DWConvMicrokernelTester() 2338 .cr(16) 2339 .kr(25) 2340 .channels(channels) 2341 .qmax(128) 2342 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mla8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2343 } 2344 } 2345 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD128,multipixel)2346 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD128, multipixel) { 2347 TEST_REQUIRES_ARM_NEON; 2348 for (size_t channels = 1; channels <= 80; channels += 15) { 2349 DWConvMicrokernelTester() 2350 .cr(16) 2351 .kr(25) 2352 .channels(channels) 2353 .width(3) 2354 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mla8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2355 } 2356 } 2357 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD128,multipixel_with_step)2358 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD128, multipixel_with_step) { 2359 TEST_REQUIRES_ARM_NEON; 2360 for (size_t channels = 1; channels <= 80; channels += 15) { 2361 for (size_t step = 2; step <= 25; step++) { 2362 DWConvMicrokernelTester() 2363 .cr(16) 2364 .kr(25) 2365 .channels(channels) 2366 .width(3) 2367 .step(step) 2368 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mla8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2369 } 2370 } 2371 } 2372 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD128,multipixel_with_output_stride)2373 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD128, multipixel_with_output_stride) { 2374 TEST_REQUIRES_ARM_NEON; 2375 for (size_t channels = 1; channels <= 80; channels += 15) { 2376 DWConvMicrokernelTester() 2377 .cr(16) 2378 .kr(25) 2379 .channels(16) 2380 .width(5) 2381 .output_stride(83) 2382 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mla8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2383 } 2384 } 2385 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD128,multipixel_with_qmin)2386 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD128, multipixel_with_qmin) { 2387 TEST_REQUIRES_ARM_NEON; 2388 for (size_t channels = 1; channels <= 80; channels += 15) { 2389 DWConvMicrokernelTester() 2390 .cr(16) 2391 .kr(25) 2392 .channels(channels) 2393 .width(3) 2394 .qmin(128) 2395 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mla8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2396 } 2397 } 2398 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD128,multipixel_with_qmax)2399 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD128, multipixel_with_qmax) { 2400 TEST_REQUIRES_ARM_NEON; 2401 for (size_t channels = 1; channels <= 80; channels += 15) { 2402 DWConvMicrokernelTester() 2403 .cr(16) 2404 .kr(25) 2405 .channels(channels) 2406 .width(3) 2407 .qmax(128) 2408 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mla8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2409 } 2410 } 2411 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD128,input_offset)2412 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD128, input_offset) { 2413 TEST_REQUIRES_ARM_NEON; 2414 for (uint32_t channels = 32; channels < 256; channels += 48) { 2415 DWConvMicrokernelTester() 2416 .cr(16) 2417 .kr(25) 2418 .channels(channels) 2419 .input_offset(304) 2420 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mla8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2421 } 2422 } 2423 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD128,zero)2424 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MLA8_LD128, zero) { 2425 TEST_REQUIRES_ARM_NEON; 2426 for (uint32_t mz = 0; mz < 25; mz++) { 2427 for (uint32_t channels = 32; channels < 256; channels += 48) { 2428 DWConvMicrokernelTester() 2429 .cr(16) 2430 .kr(25) 2431 .channels(channels) 2432 .input_offset(304) 2433 .zero_index(mz) 2434 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mla8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2435 } 2436 } 2437 } 2438 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 2439 2440 2441 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD64,c_eq_16)2442 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD64, c_eq_16) { 2443 TEST_REQUIRES_ARM_NEON; 2444 DWConvMicrokernelTester() 2445 .cr(16) 2446 .kr(25) 2447 .channels(16) 2448 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2449 } 2450 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD64,c_div_16)2451 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD64, c_div_16) { 2452 TEST_REQUIRES_ARM_NEON; 2453 for (uint32_t channels = 32; channels < 256; channels += 48) { 2454 DWConvMicrokernelTester() 2455 .cr(16) 2456 .kr(25) 2457 .channels(channels) 2458 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2459 } 2460 } 2461 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD64,c_div_16_with_qmin)2462 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD64, c_div_16_with_qmin) { 2463 TEST_REQUIRES_ARM_NEON; 2464 for (uint32_t channels = 32; channels < 256; channels += 48) { 2465 DWConvMicrokernelTester() 2466 .cr(16) 2467 .kr(25) 2468 .channels(channels) 2469 .qmin(128) 2470 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2471 } 2472 } 2473 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD64,c_div_16_with_qmax)2474 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD64, c_div_16_with_qmax) { 2475 TEST_REQUIRES_ARM_NEON; 2476 for (uint32_t channels = 32; channels < 256; channels += 48) { 2477 DWConvMicrokernelTester() 2478 .cr(16) 2479 .kr(25) 2480 .channels(channels) 2481 .qmax(128) 2482 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2483 } 2484 } 2485 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD64,c_lt_16)2486 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD64, c_lt_16) { 2487 TEST_REQUIRES_ARM_NEON; 2488 for (uint32_t channels = 1; channels < 16; channels++) { 2489 DWConvMicrokernelTester() 2490 .cr(16) 2491 .kr(25) 2492 .channels(channels) 2493 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2494 } 2495 } 2496 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD64,c_gt_16)2497 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD64, c_gt_16) { 2498 TEST_REQUIRES_ARM_NEON; 2499 for (uint32_t channels = 17; channels < 32; channels++) { 2500 DWConvMicrokernelTester() 2501 .cr(16) 2502 .kr(25) 2503 .channels(channels) 2504 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2505 } 2506 } 2507 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD64,c_gt_16_with_qmin)2508 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD64, c_gt_16_with_qmin) { 2509 TEST_REQUIRES_ARM_NEON; 2510 for (uint32_t channels = 17; channels < 32; channels++) { 2511 DWConvMicrokernelTester() 2512 .cr(16) 2513 .kr(25) 2514 .channels(channels) 2515 .qmin(128) 2516 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2517 } 2518 } 2519 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD64,c_gt_16_with_qmax)2520 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD64, c_gt_16_with_qmax) { 2521 TEST_REQUIRES_ARM_NEON; 2522 for (uint32_t channels = 17; channels < 32; channels++) { 2523 DWConvMicrokernelTester() 2524 .cr(16) 2525 .kr(25) 2526 .channels(channels) 2527 .qmax(128) 2528 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2529 } 2530 } 2531 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD64,multipixel)2532 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD64, multipixel) { 2533 TEST_REQUIRES_ARM_NEON; 2534 for (size_t channels = 1; channels <= 80; channels += 15) { 2535 DWConvMicrokernelTester() 2536 .cr(16) 2537 .kr(25) 2538 .channels(channels) 2539 .width(3) 2540 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2541 } 2542 } 2543 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD64,multipixel_with_step)2544 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD64, multipixel_with_step) { 2545 TEST_REQUIRES_ARM_NEON; 2546 for (size_t channels = 1; channels <= 80; channels += 15) { 2547 for (size_t step = 2; step <= 25; step++) { 2548 DWConvMicrokernelTester() 2549 .cr(16) 2550 .kr(25) 2551 .channels(channels) 2552 .width(3) 2553 .step(step) 2554 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2555 } 2556 } 2557 } 2558 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD64,multipixel_with_output_stride)2559 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD64, multipixel_with_output_stride) { 2560 TEST_REQUIRES_ARM_NEON; 2561 for (size_t channels = 1; channels <= 80; channels += 15) { 2562 DWConvMicrokernelTester() 2563 .cr(16) 2564 .kr(25) 2565 .channels(16) 2566 .width(5) 2567 .output_stride(83) 2568 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2569 } 2570 } 2571 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD64,multipixel_with_qmin)2572 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD64, multipixel_with_qmin) { 2573 TEST_REQUIRES_ARM_NEON; 2574 for (size_t channels = 1; channels <= 80; channels += 15) { 2575 DWConvMicrokernelTester() 2576 .cr(16) 2577 .kr(25) 2578 .channels(channels) 2579 .width(3) 2580 .qmin(128) 2581 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2582 } 2583 } 2584 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD64,multipixel_with_qmax)2585 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD64, multipixel_with_qmax) { 2586 TEST_REQUIRES_ARM_NEON; 2587 for (size_t channels = 1; channels <= 80; channels += 15) { 2588 DWConvMicrokernelTester() 2589 .cr(16) 2590 .kr(25) 2591 .channels(channels) 2592 .width(3) 2593 .qmax(128) 2594 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2595 } 2596 } 2597 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD64,input_offset)2598 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD64, input_offset) { 2599 TEST_REQUIRES_ARM_NEON; 2600 for (uint32_t channels = 32; channels < 256; channels += 48) { 2601 DWConvMicrokernelTester() 2602 .cr(16) 2603 .kr(25) 2604 .channels(channels) 2605 .input_offset(304) 2606 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2607 } 2608 } 2609 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD64,zero)2610 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD64, zero) { 2611 TEST_REQUIRES_ARM_NEON; 2612 for (uint32_t mz = 0; mz < 25; mz++) { 2613 for (uint32_t channels = 32; channels < 256; channels += 48) { 2614 DWConvMicrokernelTester() 2615 .cr(16) 2616 .kr(25) 2617 .channels(channels) 2618 .input_offset(304) 2619 .zero_index(mz) 2620 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8_ld64, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2621 } 2622 } 2623 } 2624 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 2625 2626 2627 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD128,c_eq_16)2628 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD128, c_eq_16) { 2629 TEST_REQUIRES_ARM_NEON; 2630 DWConvMicrokernelTester() 2631 .cr(16) 2632 .kr(25) 2633 .channels(16) 2634 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2635 } 2636 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD128,c_div_16)2637 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD128, c_div_16) { 2638 TEST_REQUIRES_ARM_NEON; 2639 for (uint32_t channels = 32; channels < 256; channels += 48) { 2640 DWConvMicrokernelTester() 2641 .cr(16) 2642 .kr(25) 2643 .channels(channels) 2644 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2645 } 2646 } 2647 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD128,c_div_16_with_qmin)2648 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD128, c_div_16_with_qmin) { 2649 TEST_REQUIRES_ARM_NEON; 2650 for (uint32_t channels = 32; channels < 256; channels += 48) { 2651 DWConvMicrokernelTester() 2652 .cr(16) 2653 .kr(25) 2654 .channels(channels) 2655 .qmin(128) 2656 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2657 } 2658 } 2659 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD128,c_div_16_with_qmax)2660 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD128, c_div_16_with_qmax) { 2661 TEST_REQUIRES_ARM_NEON; 2662 for (uint32_t channels = 32; channels < 256; channels += 48) { 2663 DWConvMicrokernelTester() 2664 .cr(16) 2665 .kr(25) 2666 .channels(channels) 2667 .qmax(128) 2668 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2669 } 2670 } 2671 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD128,c_lt_16)2672 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD128, c_lt_16) { 2673 TEST_REQUIRES_ARM_NEON; 2674 for (uint32_t channels = 1; channels < 16; channels++) { 2675 DWConvMicrokernelTester() 2676 .cr(16) 2677 .kr(25) 2678 .channels(channels) 2679 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2680 } 2681 } 2682 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD128,c_gt_16)2683 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD128, c_gt_16) { 2684 TEST_REQUIRES_ARM_NEON; 2685 for (uint32_t channels = 17; channels < 32; channels++) { 2686 DWConvMicrokernelTester() 2687 .cr(16) 2688 .kr(25) 2689 .channels(channels) 2690 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2691 } 2692 } 2693 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD128,c_gt_16_with_qmin)2694 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD128, c_gt_16_with_qmin) { 2695 TEST_REQUIRES_ARM_NEON; 2696 for (uint32_t channels = 17; channels < 32; channels++) { 2697 DWConvMicrokernelTester() 2698 .cr(16) 2699 .kr(25) 2700 .channels(channels) 2701 .qmin(128) 2702 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2703 } 2704 } 2705 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD128,c_gt_16_with_qmax)2706 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD128, c_gt_16_with_qmax) { 2707 TEST_REQUIRES_ARM_NEON; 2708 for (uint32_t channels = 17; channels < 32; channels++) { 2709 DWConvMicrokernelTester() 2710 .cr(16) 2711 .kr(25) 2712 .channels(channels) 2713 .qmax(128) 2714 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2715 } 2716 } 2717 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD128,multipixel)2718 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD128, multipixel) { 2719 TEST_REQUIRES_ARM_NEON; 2720 for (size_t channels = 1; channels <= 80; channels += 15) { 2721 DWConvMicrokernelTester() 2722 .cr(16) 2723 .kr(25) 2724 .channels(channels) 2725 .width(3) 2726 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2727 } 2728 } 2729 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD128,multipixel_with_step)2730 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD128, multipixel_with_step) { 2731 TEST_REQUIRES_ARM_NEON; 2732 for (size_t channels = 1; channels <= 80; channels += 15) { 2733 for (size_t step = 2; step <= 25; step++) { 2734 DWConvMicrokernelTester() 2735 .cr(16) 2736 .kr(25) 2737 .channels(channels) 2738 .width(3) 2739 .step(step) 2740 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2741 } 2742 } 2743 } 2744 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD128,multipixel_with_output_stride)2745 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD128, multipixel_with_output_stride) { 2746 TEST_REQUIRES_ARM_NEON; 2747 for (size_t channels = 1; channels <= 80; channels += 15) { 2748 DWConvMicrokernelTester() 2749 .cr(16) 2750 .kr(25) 2751 .channels(16) 2752 .width(5) 2753 .output_stride(83) 2754 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2755 } 2756 } 2757 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD128,multipixel_with_qmin)2758 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD128, multipixel_with_qmin) { 2759 TEST_REQUIRES_ARM_NEON; 2760 for (size_t channels = 1; channels <= 80; channels += 15) { 2761 DWConvMicrokernelTester() 2762 .cr(16) 2763 .kr(25) 2764 .channels(channels) 2765 .width(3) 2766 .qmin(128) 2767 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2768 } 2769 } 2770 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD128,multipixel_with_qmax)2771 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD128, multipixel_with_qmax) { 2772 TEST_REQUIRES_ARM_NEON; 2773 for (size_t channels = 1; channels <= 80; channels += 15) { 2774 DWConvMicrokernelTester() 2775 .cr(16) 2776 .kr(25) 2777 .channels(channels) 2778 .width(3) 2779 .qmax(128) 2780 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2781 } 2782 } 2783 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD128,input_offset)2784 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD128, input_offset) { 2785 TEST_REQUIRES_ARM_NEON; 2786 for (uint32_t channels = 32; channels < 256; channels += 48) { 2787 DWConvMicrokernelTester() 2788 .cr(16) 2789 .kr(25) 2790 .channels(channels) 2791 .input_offset(304) 2792 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2793 } 2794 } 2795 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD128,zero)2796 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL8_LD128, zero) { 2797 TEST_REQUIRES_ARM_NEON; 2798 for (uint32_t mz = 0; mz < 25; mz++) { 2799 for (uint32_t channels = 32; channels < 256; channels += 48) { 2800 DWConvMicrokernelTester() 2801 .cr(16) 2802 .kr(25) 2803 .channels(channels) 2804 .input_offset(304) 2805 .zero_index(mz) 2806 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul8_ld128, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2807 } 2808 } 2809 } 2810 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 2811 2812 2813 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16,c_eq_16)2814 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16, c_eq_16) { 2815 TEST_REQUIRES_ARM_NEON; 2816 DWConvMicrokernelTester() 2817 .cr(16) 2818 .kr(25) 2819 .channels(16) 2820 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2821 } 2822 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16,c_div_16)2823 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16, c_div_16) { 2824 TEST_REQUIRES_ARM_NEON; 2825 for (uint32_t channels = 32; channels < 256; channels += 48) { 2826 DWConvMicrokernelTester() 2827 .cr(16) 2828 .kr(25) 2829 .channels(channels) 2830 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2831 } 2832 } 2833 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16,c_div_16_with_qmin)2834 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16, c_div_16_with_qmin) { 2835 TEST_REQUIRES_ARM_NEON; 2836 for (uint32_t channels = 32; channels < 256; channels += 48) { 2837 DWConvMicrokernelTester() 2838 .cr(16) 2839 .kr(25) 2840 .channels(channels) 2841 .qmin(128) 2842 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2843 } 2844 } 2845 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16,c_div_16_with_qmax)2846 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16, c_div_16_with_qmax) { 2847 TEST_REQUIRES_ARM_NEON; 2848 for (uint32_t channels = 32; channels < 256; channels += 48) { 2849 DWConvMicrokernelTester() 2850 .cr(16) 2851 .kr(25) 2852 .channels(channels) 2853 .qmax(128) 2854 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2855 } 2856 } 2857 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16,c_lt_16)2858 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16, c_lt_16) { 2859 TEST_REQUIRES_ARM_NEON; 2860 for (uint32_t channels = 1; channels < 16; channels++) { 2861 DWConvMicrokernelTester() 2862 .cr(16) 2863 .kr(25) 2864 .channels(channels) 2865 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2866 } 2867 } 2868 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16,c_gt_16)2869 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16, c_gt_16) { 2870 TEST_REQUIRES_ARM_NEON; 2871 for (uint32_t channels = 17; channels < 32; channels++) { 2872 DWConvMicrokernelTester() 2873 .cr(16) 2874 .kr(25) 2875 .channels(channels) 2876 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2877 } 2878 } 2879 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16,c_gt_16_with_qmin)2880 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16, c_gt_16_with_qmin) { 2881 TEST_REQUIRES_ARM_NEON; 2882 for (uint32_t channels = 17; channels < 32; channels++) { 2883 DWConvMicrokernelTester() 2884 .cr(16) 2885 .kr(25) 2886 .channels(channels) 2887 .qmin(128) 2888 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2889 } 2890 } 2891 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16,c_gt_16_with_qmax)2892 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16, c_gt_16_with_qmax) { 2893 TEST_REQUIRES_ARM_NEON; 2894 for (uint32_t channels = 17; channels < 32; channels++) { 2895 DWConvMicrokernelTester() 2896 .cr(16) 2897 .kr(25) 2898 .channels(channels) 2899 .qmax(128) 2900 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2901 } 2902 } 2903 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16,multipixel)2904 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16, multipixel) { 2905 TEST_REQUIRES_ARM_NEON; 2906 for (size_t channels = 1; channels <= 80; channels += 15) { 2907 DWConvMicrokernelTester() 2908 .cr(16) 2909 .kr(25) 2910 .channels(channels) 2911 .width(3) 2912 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2913 } 2914 } 2915 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16,multipixel_with_step)2916 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16, multipixel_with_step) { 2917 TEST_REQUIRES_ARM_NEON; 2918 for (size_t channels = 1; channels <= 80; channels += 15) { 2919 for (size_t step = 2; step <= 25; step++) { 2920 DWConvMicrokernelTester() 2921 .cr(16) 2922 .kr(25) 2923 .channels(channels) 2924 .width(3) 2925 .step(step) 2926 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2927 } 2928 } 2929 } 2930 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16,multipixel_with_output_stride)2931 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16, multipixel_with_output_stride) { 2932 TEST_REQUIRES_ARM_NEON; 2933 for (size_t channels = 1; channels <= 80; channels += 15) { 2934 DWConvMicrokernelTester() 2935 .cr(16) 2936 .kr(25) 2937 .channels(16) 2938 .width(5) 2939 .output_stride(83) 2940 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2941 } 2942 } 2943 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16,multipixel_with_qmin)2944 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16, multipixel_with_qmin) { 2945 TEST_REQUIRES_ARM_NEON; 2946 for (size_t channels = 1; channels <= 80; channels += 15) { 2947 DWConvMicrokernelTester() 2948 .cr(16) 2949 .kr(25) 2950 .channels(channels) 2951 .width(3) 2952 .qmin(128) 2953 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2954 } 2955 } 2956 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16,multipixel_with_qmax)2957 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16, multipixel_with_qmax) { 2958 TEST_REQUIRES_ARM_NEON; 2959 for (size_t channels = 1; channels <= 80; channels += 15) { 2960 DWConvMicrokernelTester() 2961 .cr(16) 2962 .kr(25) 2963 .channels(channels) 2964 .width(3) 2965 .qmax(128) 2966 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2967 } 2968 } 2969 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16,input_offset)2970 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16, input_offset) { 2971 TEST_REQUIRES_ARM_NEON; 2972 for (uint32_t channels = 32; channels < 256; channels += 48) { 2973 DWConvMicrokernelTester() 2974 .cr(16) 2975 .kr(25) 2976 .channels(channels) 2977 .input_offset(304) 2978 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2979 } 2980 } 2981 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16,zero)2982 TEST(QS8_DWCONV_MINMAX_RNDNU_UP16X25__NEON_MUL16, zero) { 2983 TEST_REQUIRES_ARM_NEON; 2984 for (uint32_t mz = 0; mz < 25; mz++) { 2985 for (uint32_t channels = 32; channels < 256; channels += 48) { 2986 DWConvMicrokernelTester() 2987 .cr(16) 2988 .kr(25) 2989 .channels(channels) 2990 .input_offset(304) 2991 .zero_index(mz) 2992 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up16x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 2993 } 2994 } 2995 } 2996 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 2997 2998 2999 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16,c_eq_24)3000 TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16, c_eq_24) { 3001 TEST_REQUIRES_ARM_NEON; 3002 DWConvMicrokernelTester() 3003 .cr(24) 3004 .kr(9) 3005 .channels(24) 3006 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3007 } 3008 TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16,c_div_24)3009 TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16, c_div_24) { 3010 TEST_REQUIRES_ARM_NEON; 3011 for (uint32_t channels = 48; channels < 384; channels += 72) { 3012 DWConvMicrokernelTester() 3013 .cr(24) 3014 .kr(9) 3015 .channels(channels) 3016 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3017 } 3018 } 3019 TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16,c_div_24_with_qmin)3020 TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16, c_div_24_with_qmin) { 3021 TEST_REQUIRES_ARM_NEON; 3022 for (uint32_t channels = 48; channels < 384; channels += 72) { 3023 DWConvMicrokernelTester() 3024 .cr(24) 3025 .kr(9) 3026 .channels(channels) 3027 .qmin(128) 3028 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3029 } 3030 } 3031 TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16,c_div_24_with_qmax)3032 TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16, c_div_24_with_qmax) { 3033 TEST_REQUIRES_ARM_NEON; 3034 for (uint32_t channels = 48; channels < 384; channels += 72) { 3035 DWConvMicrokernelTester() 3036 .cr(24) 3037 .kr(9) 3038 .channels(channels) 3039 .qmax(128) 3040 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3041 } 3042 } 3043 TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16,c_lt_24)3044 TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16, c_lt_24) { 3045 TEST_REQUIRES_ARM_NEON; 3046 for (uint32_t channels = 1; channels < 24; channels++) { 3047 DWConvMicrokernelTester() 3048 .cr(24) 3049 .kr(9) 3050 .channels(channels) 3051 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3052 } 3053 } 3054 TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16,c_gt_24)3055 TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16, c_gt_24) { 3056 TEST_REQUIRES_ARM_NEON; 3057 for (uint32_t channels = 25; channels < 48; channels++) { 3058 DWConvMicrokernelTester() 3059 .cr(24) 3060 .kr(9) 3061 .channels(channels) 3062 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3063 } 3064 } 3065 TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16,c_gt_24_with_qmin)3066 TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16, c_gt_24_with_qmin) { 3067 TEST_REQUIRES_ARM_NEON; 3068 for (uint32_t channels = 25; channels < 48; channels++) { 3069 DWConvMicrokernelTester() 3070 .cr(24) 3071 .kr(9) 3072 .channels(channels) 3073 .qmin(128) 3074 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3075 } 3076 } 3077 TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16,c_gt_24_with_qmax)3078 TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16, c_gt_24_with_qmax) { 3079 TEST_REQUIRES_ARM_NEON; 3080 for (uint32_t channels = 25; channels < 48; channels++) { 3081 DWConvMicrokernelTester() 3082 .cr(24) 3083 .kr(9) 3084 .channels(channels) 3085 .qmax(128) 3086 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3087 } 3088 } 3089 TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16,multipixel)3090 TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16, multipixel) { 3091 TEST_REQUIRES_ARM_NEON; 3092 for (size_t channels = 1; channels <= 120; channels += 23) { 3093 DWConvMicrokernelTester() 3094 .cr(24) 3095 .kr(9) 3096 .channels(channels) 3097 .width(3) 3098 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3099 } 3100 } 3101 TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16,multipixel_with_step)3102 TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16, multipixel_with_step) { 3103 TEST_REQUIRES_ARM_NEON; 3104 for (size_t channels = 1; channels <= 120; channels += 23) { 3105 for (size_t step = 2; step <= 9; step++) { 3106 DWConvMicrokernelTester() 3107 .cr(24) 3108 .kr(9) 3109 .channels(channels) 3110 .width(3) 3111 .step(step) 3112 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3113 } 3114 } 3115 } 3116 TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16,multipixel_with_output_stride)3117 TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16, multipixel_with_output_stride) { 3118 TEST_REQUIRES_ARM_NEON; 3119 for (size_t channels = 1; channels <= 120; channels += 23) { 3120 DWConvMicrokernelTester() 3121 .cr(24) 3122 .kr(9) 3123 .channels(24) 3124 .width(5) 3125 .output_stride(127) 3126 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3127 } 3128 } 3129 TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16,multipixel_with_qmin)3130 TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16, multipixel_with_qmin) { 3131 TEST_REQUIRES_ARM_NEON; 3132 for (size_t channels = 1; channels <= 120; channels += 23) { 3133 DWConvMicrokernelTester() 3134 .cr(24) 3135 .kr(9) 3136 .channels(channels) 3137 .width(3) 3138 .qmin(128) 3139 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3140 } 3141 } 3142 TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16,multipixel_with_qmax)3143 TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16, multipixel_with_qmax) { 3144 TEST_REQUIRES_ARM_NEON; 3145 for (size_t channels = 1; channels <= 120; channels += 23) { 3146 DWConvMicrokernelTester() 3147 .cr(24) 3148 .kr(9) 3149 .channels(channels) 3150 .width(3) 3151 .qmax(128) 3152 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3153 } 3154 } 3155 TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16,input_offset)3156 TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16, input_offset) { 3157 TEST_REQUIRES_ARM_NEON; 3158 for (uint32_t channels = 48; channels < 384; channels += 72) { 3159 DWConvMicrokernelTester() 3160 .cr(24) 3161 .kr(9) 3162 .channels(channels) 3163 .input_offset(464) 3164 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3165 } 3166 } 3167 TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16,zero)3168 TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X9__NEON_MUL16, zero) { 3169 TEST_REQUIRES_ARM_NEON; 3170 for (uint32_t mz = 0; mz < 9; mz++) { 3171 for (uint32_t channels = 48; channels < 384; channels += 72) { 3172 DWConvMicrokernelTester() 3173 .cr(24) 3174 .kr(9) 3175 .channels(channels) 3176 .input_offset(464) 3177 .zero_index(mz) 3178 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up24x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3179 } 3180 } 3181 } 3182 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 3183 3184 3185 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16,c_eq_24)3186 TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16, c_eq_24) { 3187 TEST_REQUIRES_ARM_NEON; 3188 DWConvMicrokernelTester() 3189 .cr(24) 3190 .kr(25) 3191 .channels(24) 3192 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3193 } 3194 TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16,c_div_24)3195 TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16, c_div_24) { 3196 TEST_REQUIRES_ARM_NEON; 3197 for (uint32_t channels = 48; channels < 384; channels += 72) { 3198 DWConvMicrokernelTester() 3199 .cr(24) 3200 .kr(25) 3201 .channels(channels) 3202 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3203 } 3204 } 3205 TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16,c_div_24_with_qmin)3206 TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16, c_div_24_with_qmin) { 3207 TEST_REQUIRES_ARM_NEON; 3208 for (uint32_t channels = 48; channels < 384; channels += 72) { 3209 DWConvMicrokernelTester() 3210 .cr(24) 3211 .kr(25) 3212 .channels(channels) 3213 .qmin(128) 3214 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3215 } 3216 } 3217 TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16,c_div_24_with_qmax)3218 TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16, c_div_24_with_qmax) { 3219 TEST_REQUIRES_ARM_NEON; 3220 for (uint32_t channels = 48; channels < 384; channels += 72) { 3221 DWConvMicrokernelTester() 3222 .cr(24) 3223 .kr(25) 3224 .channels(channels) 3225 .qmax(128) 3226 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3227 } 3228 } 3229 TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16,c_lt_24)3230 TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16, c_lt_24) { 3231 TEST_REQUIRES_ARM_NEON; 3232 for (uint32_t channels = 1; channels < 24; channels++) { 3233 DWConvMicrokernelTester() 3234 .cr(24) 3235 .kr(25) 3236 .channels(channels) 3237 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3238 } 3239 } 3240 TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16,c_gt_24)3241 TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16, c_gt_24) { 3242 TEST_REQUIRES_ARM_NEON; 3243 for (uint32_t channels = 25; channels < 48; channels++) { 3244 DWConvMicrokernelTester() 3245 .cr(24) 3246 .kr(25) 3247 .channels(channels) 3248 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3249 } 3250 } 3251 TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16,c_gt_24_with_qmin)3252 TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16, c_gt_24_with_qmin) { 3253 TEST_REQUIRES_ARM_NEON; 3254 for (uint32_t channels = 25; channels < 48; channels++) { 3255 DWConvMicrokernelTester() 3256 .cr(24) 3257 .kr(25) 3258 .channels(channels) 3259 .qmin(128) 3260 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3261 } 3262 } 3263 TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16,c_gt_24_with_qmax)3264 TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16, c_gt_24_with_qmax) { 3265 TEST_REQUIRES_ARM_NEON; 3266 for (uint32_t channels = 25; channels < 48; channels++) { 3267 DWConvMicrokernelTester() 3268 .cr(24) 3269 .kr(25) 3270 .channels(channels) 3271 .qmax(128) 3272 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3273 } 3274 } 3275 TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16,multipixel)3276 TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16, multipixel) { 3277 TEST_REQUIRES_ARM_NEON; 3278 for (size_t channels = 1; channels <= 120; channels += 23) { 3279 DWConvMicrokernelTester() 3280 .cr(24) 3281 .kr(25) 3282 .channels(channels) 3283 .width(3) 3284 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3285 } 3286 } 3287 TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16,multipixel_with_step)3288 TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16, multipixel_with_step) { 3289 TEST_REQUIRES_ARM_NEON; 3290 for (size_t channels = 1; channels <= 120; channels += 23) { 3291 for (size_t step = 2; step <= 25; step++) { 3292 DWConvMicrokernelTester() 3293 .cr(24) 3294 .kr(25) 3295 .channels(channels) 3296 .width(3) 3297 .step(step) 3298 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3299 } 3300 } 3301 } 3302 TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16,multipixel_with_output_stride)3303 TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16, multipixel_with_output_stride) { 3304 TEST_REQUIRES_ARM_NEON; 3305 for (size_t channels = 1; channels <= 120; channels += 23) { 3306 DWConvMicrokernelTester() 3307 .cr(24) 3308 .kr(25) 3309 .channels(24) 3310 .width(5) 3311 .output_stride(127) 3312 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3313 } 3314 } 3315 TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16,multipixel_with_qmin)3316 TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16, multipixel_with_qmin) { 3317 TEST_REQUIRES_ARM_NEON; 3318 for (size_t channels = 1; channels <= 120; channels += 23) { 3319 DWConvMicrokernelTester() 3320 .cr(24) 3321 .kr(25) 3322 .channels(channels) 3323 .width(3) 3324 .qmin(128) 3325 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3326 } 3327 } 3328 TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16,multipixel_with_qmax)3329 TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16, multipixel_with_qmax) { 3330 TEST_REQUIRES_ARM_NEON; 3331 for (size_t channels = 1; channels <= 120; channels += 23) { 3332 DWConvMicrokernelTester() 3333 .cr(24) 3334 .kr(25) 3335 .channels(channels) 3336 .width(3) 3337 .qmax(128) 3338 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3339 } 3340 } 3341 TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16,input_offset)3342 TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16, input_offset) { 3343 TEST_REQUIRES_ARM_NEON; 3344 for (uint32_t channels = 48; channels < 384; channels += 72) { 3345 DWConvMicrokernelTester() 3346 .cr(24) 3347 .kr(25) 3348 .channels(channels) 3349 .input_offset(464) 3350 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3351 } 3352 } 3353 TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16,zero)3354 TEST(QS8_DWCONV_MINMAX_RNDNU_UP24X25__NEON_MUL16, zero) { 3355 TEST_REQUIRES_ARM_NEON; 3356 for (uint32_t mz = 0; mz < 25; mz++) { 3357 for (uint32_t channels = 48; channels < 384; channels += 72) { 3358 DWConvMicrokernelTester() 3359 .cr(24) 3360 .kr(25) 3361 .channels(channels) 3362 .input_offset(464) 3363 .zero_index(mz) 3364 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up24x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3365 } 3366 } 3367 } 3368 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 3369 3370 3371 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16,c_eq_32)3372 TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16, c_eq_32) { 3373 TEST_REQUIRES_ARM_NEON; 3374 DWConvMicrokernelTester() 3375 .cr(32) 3376 .kr(9) 3377 .channels(32) 3378 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3379 } 3380 TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16,c_div_32)3381 TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16, c_div_32) { 3382 TEST_REQUIRES_ARM_NEON; 3383 for (uint32_t channels = 64; channels < 512; channels += 96) { 3384 DWConvMicrokernelTester() 3385 .cr(32) 3386 .kr(9) 3387 .channels(channels) 3388 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3389 } 3390 } 3391 TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16,c_div_32_with_qmin)3392 TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16, c_div_32_with_qmin) { 3393 TEST_REQUIRES_ARM_NEON; 3394 for (uint32_t channels = 64; channels < 512; channels += 96) { 3395 DWConvMicrokernelTester() 3396 .cr(32) 3397 .kr(9) 3398 .channels(channels) 3399 .qmin(128) 3400 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3401 } 3402 } 3403 TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16,c_div_32_with_qmax)3404 TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16, c_div_32_with_qmax) { 3405 TEST_REQUIRES_ARM_NEON; 3406 for (uint32_t channels = 64; channels < 512; channels += 96) { 3407 DWConvMicrokernelTester() 3408 .cr(32) 3409 .kr(9) 3410 .channels(channels) 3411 .qmax(128) 3412 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3413 } 3414 } 3415 TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16,c_lt_32)3416 TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16, c_lt_32) { 3417 TEST_REQUIRES_ARM_NEON; 3418 for (uint32_t channels = 1; channels < 32; channels++) { 3419 DWConvMicrokernelTester() 3420 .cr(32) 3421 .kr(9) 3422 .channels(channels) 3423 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3424 } 3425 } 3426 TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16,c_gt_32)3427 TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16, c_gt_32) { 3428 TEST_REQUIRES_ARM_NEON; 3429 for (uint32_t channels = 33; channels < 64; channels++) { 3430 DWConvMicrokernelTester() 3431 .cr(32) 3432 .kr(9) 3433 .channels(channels) 3434 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3435 } 3436 } 3437 TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16,c_gt_32_with_qmin)3438 TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16, c_gt_32_with_qmin) { 3439 TEST_REQUIRES_ARM_NEON; 3440 for (uint32_t channels = 33; channels < 64; channels++) { 3441 DWConvMicrokernelTester() 3442 .cr(32) 3443 .kr(9) 3444 .channels(channels) 3445 .qmin(128) 3446 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3447 } 3448 } 3449 TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16,c_gt_32_with_qmax)3450 TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16, c_gt_32_with_qmax) { 3451 TEST_REQUIRES_ARM_NEON; 3452 for (uint32_t channels = 33; channels < 64; channels++) { 3453 DWConvMicrokernelTester() 3454 .cr(32) 3455 .kr(9) 3456 .channels(channels) 3457 .qmax(128) 3458 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3459 } 3460 } 3461 TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16,multipixel)3462 TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16, multipixel) { 3463 TEST_REQUIRES_ARM_NEON; 3464 for (size_t channels = 1; channels <= 160; channels += 31) { 3465 DWConvMicrokernelTester() 3466 .cr(32) 3467 .kr(9) 3468 .channels(channels) 3469 .width(3) 3470 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3471 } 3472 } 3473 TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16,multipixel_with_step)3474 TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16, multipixel_with_step) { 3475 TEST_REQUIRES_ARM_NEON; 3476 for (size_t channels = 1; channels <= 160; channels += 31) { 3477 for (size_t step = 2; step <= 9; step++) { 3478 DWConvMicrokernelTester() 3479 .cr(32) 3480 .kr(9) 3481 .channels(channels) 3482 .width(3) 3483 .step(step) 3484 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3485 } 3486 } 3487 } 3488 TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16,multipixel_with_output_stride)3489 TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16, multipixel_with_output_stride) { 3490 TEST_REQUIRES_ARM_NEON; 3491 for (size_t channels = 1; channels <= 160; channels += 31) { 3492 DWConvMicrokernelTester() 3493 .cr(32) 3494 .kr(9) 3495 .channels(32) 3496 .width(5) 3497 .output_stride(163) 3498 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3499 } 3500 } 3501 TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16,multipixel_with_qmin)3502 TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16, multipixel_with_qmin) { 3503 TEST_REQUIRES_ARM_NEON; 3504 for (size_t channels = 1; channels <= 160; channels += 31) { 3505 DWConvMicrokernelTester() 3506 .cr(32) 3507 .kr(9) 3508 .channels(channels) 3509 .width(3) 3510 .qmin(128) 3511 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3512 } 3513 } 3514 TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16,multipixel_with_qmax)3515 TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16, multipixel_with_qmax) { 3516 TEST_REQUIRES_ARM_NEON; 3517 for (size_t channels = 1; channels <= 160; channels += 31) { 3518 DWConvMicrokernelTester() 3519 .cr(32) 3520 .kr(9) 3521 .channels(channels) 3522 .width(3) 3523 .qmax(128) 3524 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3525 } 3526 } 3527 TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16,input_offset)3528 TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16, input_offset) { 3529 TEST_REQUIRES_ARM_NEON; 3530 for (uint32_t channels = 64; channels < 512; channels += 96) { 3531 DWConvMicrokernelTester() 3532 .cr(32) 3533 .kr(9) 3534 .channels(channels) 3535 .input_offset(592) 3536 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3537 } 3538 } 3539 TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16,zero)3540 TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X9__NEON_MUL16, zero) { 3541 TEST_REQUIRES_ARM_NEON; 3542 for (uint32_t mz = 0; mz < 9; mz++) { 3543 for (uint32_t channels = 64; channels < 512; channels += 96) { 3544 DWConvMicrokernelTester() 3545 .cr(32) 3546 .kr(9) 3547 .channels(channels) 3548 .input_offset(592) 3549 .zero_index(mz) 3550 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x9__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3551 } 3552 } 3553 } 3554 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 3555 3556 3557 #if XNN_ARCH_ARM || XNN_ARCH_ARM64 TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16,c_eq_32)3558 TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16, c_eq_32) { 3559 TEST_REQUIRES_ARM_NEON; 3560 DWConvMicrokernelTester() 3561 .cr(32) 3562 .kr(25) 3563 .channels(32) 3564 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3565 } 3566 TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16,c_div_32)3567 TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16, c_div_32) { 3568 TEST_REQUIRES_ARM_NEON; 3569 for (uint32_t channels = 64; channels < 512; channels += 96) { 3570 DWConvMicrokernelTester() 3571 .cr(32) 3572 .kr(25) 3573 .channels(channels) 3574 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3575 } 3576 } 3577 TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16,c_div_32_with_qmin)3578 TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16, c_div_32_with_qmin) { 3579 TEST_REQUIRES_ARM_NEON; 3580 for (uint32_t channels = 64; channels < 512; channels += 96) { 3581 DWConvMicrokernelTester() 3582 .cr(32) 3583 .kr(25) 3584 .channels(channels) 3585 .qmin(128) 3586 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3587 } 3588 } 3589 TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16,c_div_32_with_qmax)3590 TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16, c_div_32_with_qmax) { 3591 TEST_REQUIRES_ARM_NEON; 3592 for (uint32_t channels = 64; channels < 512; channels += 96) { 3593 DWConvMicrokernelTester() 3594 .cr(32) 3595 .kr(25) 3596 .channels(channels) 3597 .qmax(128) 3598 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3599 } 3600 } 3601 TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16,c_lt_32)3602 TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16, c_lt_32) { 3603 TEST_REQUIRES_ARM_NEON; 3604 for (uint32_t channels = 1; channels < 32; channels++) { 3605 DWConvMicrokernelTester() 3606 .cr(32) 3607 .kr(25) 3608 .channels(channels) 3609 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3610 } 3611 } 3612 TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16,c_gt_32)3613 TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16, c_gt_32) { 3614 TEST_REQUIRES_ARM_NEON; 3615 for (uint32_t channels = 33; channels < 64; channels++) { 3616 DWConvMicrokernelTester() 3617 .cr(32) 3618 .kr(25) 3619 .channels(channels) 3620 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3621 } 3622 } 3623 TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16,c_gt_32_with_qmin)3624 TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16, c_gt_32_with_qmin) { 3625 TEST_REQUIRES_ARM_NEON; 3626 for (uint32_t channels = 33; channels < 64; channels++) { 3627 DWConvMicrokernelTester() 3628 .cr(32) 3629 .kr(25) 3630 .channels(channels) 3631 .qmin(128) 3632 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3633 } 3634 } 3635 TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16,c_gt_32_with_qmax)3636 TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16, c_gt_32_with_qmax) { 3637 TEST_REQUIRES_ARM_NEON; 3638 for (uint32_t channels = 33; channels < 64; channels++) { 3639 DWConvMicrokernelTester() 3640 .cr(32) 3641 .kr(25) 3642 .channels(channels) 3643 .qmax(128) 3644 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3645 } 3646 } 3647 TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16,multipixel)3648 TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16, multipixel) { 3649 TEST_REQUIRES_ARM_NEON; 3650 for (size_t channels = 1; channels <= 160; channels += 31) { 3651 DWConvMicrokernelTester() 3652 .cr(32) 3653 .kr(25) 3654 .channels(channels) 3655 .width(3) 3656 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3657 } 3658 } 3659 TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16,multipixel_with_step)3660 TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16, multipixel_with_step) { 3661 TEST_REQUIRES_ARM_NEON; 3662 for (size_t channels = 1; channels <= 160; channels += 31) { 3663 for (size_t step = 2; step <= 25; step++) { 3664 DWConvMicrokernelTester() 3665 .cr(32) 3666 .kr(25) 3667 .channels(channels) 3668 .width(3) 3669 .step(step) 3670 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3671 } 3672 } 3673 } 3674 TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16,multipixel_with_output_stride)3675 TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16, multipixel_with_output_stride) { 3676 TEST_REQUIRES_ARM_NEON; 3677 for (size_t channels = 1; channels <= 160; channels += 31) { 3678 DWConvMicrokernelTester() 3679 .cr(32) 3680 .kr(25) 3681 .channels(32) 3682 .width(5) 3683 .output_stride(163) 3684 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3685 } 3686 } 3687 TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16,multipixel_with_qmin)3688 TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16, multipixel_with_qmin) { 3689 TEST_REQUIRES_ARM_NEON; 3690 for (size_t channels = 1; channels <= 160; channels += 31) { 3691 DWConvMicrokernelTester() 3692 .cr(32) 3693 .kr(25) 3694 .channels(channels) 3695 .width(3) 3696 .qmin(128) 3697 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3698 } 3699 } 3700 TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16,multipixel_with_qmax)3701 TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16, multipixel_with_qmax) { 3702 TEST_REQUIRES_ARM_NEON; 3703 for (size_t channels = 1; channels <= 160; channels += 31) { 3704 DWConvMicrokernelTester() 3705 .cr(32) 3706 .kr(25) 3707 .channels(channels) 3708 .width(3) 3709 .qmax(128) 3710 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3711 } 3712 } 3713 TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16,input_offset)3714 TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16, input_offset) { 3715 TEST_REQUIRES_ARM_NEON; 3716 for (uint32_t channels = 64; channels < 512; channels += 96) { 3717 DWConvMicrokernelTester() 3718 .cr(32) 3719 .kr(25) 3720 .channels(channels) 3721 .input_offset(592) 3722 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3723 } 3724 } 3725 TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16,zero)3726 TEST(QS8_DWCONV_MINMAX_RNDNU_UP32X25__NEON_MUL16, zero) { 3727 TEST_REQUIRES_ARM_NEON; 3728 for (uint32_t mz = 0; mz < 25; mz++) { 3729 for (uint32_t channels = 64; channels < 512; channels += 96) { 3730 DWConvMicrokernelTester() 3731 .cr(32) 3732 .kr(25) 3733 .channels(channels) 3734 .input_offset(592) 3735 .zero_index(mz) 3736 .Test(xnn_qs8_dwconv_minmax_rndnu_ukernel_up32x25__neon_mul16, xnn_init_qs8_conv_minmax_rndnu_neon_params, xnn_qs8_requantize_rndnu); 3737 } 3738 } 3739 } 3740 #endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 3741