/external/XNNPACK/src/f32-dwconv2d-chw/gen/ |
D | 5x5p2-minmax-scalar-2x1.c | 102 float vi4x2 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() local 155 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 165 vo1p0 += vi4x2 * vk32; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 166 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 173 vi4x2 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 250 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 260 vo1p0 += vi4x2 * vk32; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 261 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 268 vi4x2 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 325 vo1p0 += vi4x2 * vk32; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() [all …]
|
D | 5x5p2-minmax-scalar-3x1.c | 110 float vi4x2 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() local 177 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 189 vo2p0 += vi4x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 191 vo1p0 += vi4x2 * vk32; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 193 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 201 vi4x2 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 304 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 316 vo2p0 += vi4x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 318 vo1p0 += vi4x2 * vk32; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 320 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() [all …]
|
D | 5x5p2-minmax-scalar-1x1-acc4.c | 94 float vi4x2 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4() local 133 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4() 139 vo0p2 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4() 145 vi4x2 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4() 199 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4() 205 vo0p2 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4() 211 vi4x2 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4() 249 vo0p2 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4()
|
D | 5x5p2-minmax-scalar-1x1-acc3.c | 94 float vi4x2 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3() local 133 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3() 139 vo0p2 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3() 145 vi4x2 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3() 198 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3() 204 vo0p2 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3() 210 vi4x2 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3() 247 vo0p2 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3()
|
D | 5x5p2-minmax-scalar-1x1.c | 94 float vi4x2 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1() local 133 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1() 139 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1() 145 vi4x2 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1() 196 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1() 202 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1() 208 vi4x2 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1() 243 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1()
|
D | 5x5p2-minmax-scalar-1x1-acc2.c | 94 float vi4x2 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2() local 133 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2() 139 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2() 145 vi4x2 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2() 197 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2() 203 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2() 209 vi4x2 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2() 245 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2()
|
D | 5x5p2-minmax-scalar-3x1-acc2.c | 110 float vi4x2 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() local 177 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 189 vo2p0 += vi4x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 191 vo1p1 += vi4x2 * vk32; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 193 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 201 vi4x2 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 307 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 319 vo2p0 += vi4x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 321 vo1p1 += vi4x2 * vk32; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 323 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() [all …]
|
D | 5x5p2-minmax-scalar-2x1-acc3.c | 102 float vi4x2 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() local 155 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 165 vo1p1 += vi4x2 * vk32; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 166 vo0p2 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 173 vi4x2 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 254 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 264 vo1p1 += vi4x2 * vk32; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 265 vo0p2 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 272 vi4x2 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 333 vo1p1 += vi4x2 * vk32; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() [all …]
|
D | 5x5p2-minmax-scalar-2x1-acc2.c | 102 float vi4x2 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() local 155 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 165 vo1p1 += vi4x2 * vk32; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 166 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 173 vi4x2 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 252 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 262 vo1p1 += vi4x2 * vk32; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 263 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 270 vi4x2 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 329 vo1p1 += vi4x2 * vk32; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() [all …]
|
D | 5x5p2-minmax-scalar-1x1-acc5.c | 94 float vi4x2 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5() local 133 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5() 139 vo0p4 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5() 145 vi4x2 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5() 200 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5() 206 vo0p4 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5() 212 vi4x2 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5() 251 vo0p4 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5()
|
D | 5x5s2p2-minmax-scalar-3x1-acc2.c | 131 float vi4x2 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() local 188 vi4x0 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() 222 vo2p0 += vi4x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() 227 vo1p0 += vi4x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() 232 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() 240 vi4x2 = vi4x4; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() 339 vo2p0 += vi4x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() 344 vo1p0 += vi4x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() 349 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() 419 vo2p0 += vi4x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() [all …]
|
D | 5x5s2p2-minmax-scalar-3x1.c | 131 float vi4x2 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() local 188 vi4x0 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() 222 vo2p0 += vi4x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() 227 vo1p0 += vi4x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() 232 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() 240 vi4x2 = vi4x4; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() 336 vo2p0 += vi4x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() 341 vo1p0 += vi4x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() 346 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() 413 vo2p0 += vi4x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() [all …]
|
D | 5x5s2p2-minmax-scalar-2x1-acc3.c | 117 float vi4x2 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() local 161 vi4x0 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() 189 vo1p0 += vi4x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() 192 vo0p2 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() 199 vi4x2 = vi4x4; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() 275 vo1p0 += vi4x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() 278 vo0p2 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() 333 vo1p0 += vi4x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() 336 vo0p2 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
|
D | 5x5s2p2-minmax-scalar-2x1-acc2.c | 117 float vi4x2 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() local 161 vi4x0 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() 189 vo1p0 += vi4x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() 192 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() 199 vi4x2 = vi4x4; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() 273 vo1p0 += vi4x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() 276 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() 329 vo1p0 += vi4x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() 332 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2()
|
D | 5x5s2p2-minmax-scalar-2x1.c | 117 float vi4x2 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() local 161 vi4x0 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() 189 vo1p0 += vi4x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() 192 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() 199 vi4x2 = vi4x4; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() 271 vo1p0 += vi4x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() 274 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() 325 vo1p0 += vi4x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() 328 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
|
D | 5x5s2p2-minmax-scalar-1x1-acc4.c | 102 float vi4x2 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4() local 133 vi4x0 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4() 151 vo0p2 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4() 157 vi4x2 = vi4x4; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4() 204 vo0p2 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4() 238 vo0p2 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4()
|
D | 5x5s2p2-minmax-scalar-1x1.c | 102 float vi4x2 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1() local 133 vi4x0 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1() 151 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1() 157 vi4x2 = vi4x4; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1() 201 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1() 232 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1()
|
D | 5x5s2p2-minmax-scalar-1x1-acc2.c | 102 float vi4x2 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2() local 133 vi4x0 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2() 151 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2() 157 vi4x2 = vi4x4; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2() 202 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2() 234 vo0p0 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2()
|
D | 5x5s2p2-minmax-scalar-1x1-acc3.c | 102 float vi4x2 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3() local 133 vi4x0 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3() 151 vo0p2 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3() 157 vi4x2 = vi4x4; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3() 203 vo0p2 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3() 236 vo0p2 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3()
|
D | 5x5s2p2-minmax-scalar-1x1-acc5.c | 102 float vi4x2 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5() local 133 vi4x0 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5() 151 vo0p4 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5() 157 vi4x2 = vi4x4; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5() 205 vo0p4 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5() 240 vo0p4 += vi4x2 * vk42; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5()
|
D | 3x3p1-minmax-scalar-5x1.c | 103 const float vi4x2 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() local 151 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() 159 vo4p0 += vi4x2 * vk02; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() 163 vo3p0 += vi4x2 * vk12; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() 167 vo2p0 += vi4x2 * vk22; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1()
|
/external/XNNPACK/src/f32-conv-hwc2chw/ |
D | 3x3s2p1c3x4-wasmsimd-2x2.c | 145 const v128_t vi4x2 = wasm_v128_load(i4); i4 += 4; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() local 164 …vo1x1 = wasm_f32x4_add(vo1x1, wasm_f32x4_mul(vk20c1, wasm_v32x4_shuffle(vi4x2, vi4x2, 0, 0, 0, 0))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() 185 …vo1x1 = wasm_f32x4_add(vo1x1, wasm_f32x4_mul(vk20c2, wasm_v32x4_shuffle(vi4x2, vi4x2, 1, 1, 1, 1))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() 206 …vo1x1 = wasm_f32x4_add(vo1x1, wasm_f32x4_mul(vk21c0, wasm_v32x4_shuffle(vi4x2, vi4x2, 2, 2, 2, 2))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() 227 …vo1x1 = wasm_f32x4_add(vo1x1, wasm_f32x4_mul(vk21c1, wasm_v32x4_shuffle(vi4x2, vi4x2, 3, 3, 3, 3))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() 295 …vo1x0 = wasm_f32x4_add(vo1x0, wasm_f32x4_mul(vk22c1, wasm_v32x4_shuffle(vi4x2, vi4x2, 0, 0, 0, 0))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() 316 …vo1x0 = wasm_f32x4_add(vo1x0, wasm_f32x4_mul(vk22c2, wasm_v32x4_shuffle(vi4x2, vi4x2, 1, 1, 1, 1))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() 398 v128_t vi4x2 = vzero; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() local 405 vi4x2 = wasm_v128_load(i4 + 4); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() 425 …vo1x1 = wasm_f32x4_add(vo1x1, wasm_f32x4_mul(vk20c1, wasm_v32x4_shuffle(vi4x2, vi4x2, 0, 0, 0, 0))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() [all …]
|
D | 3x3s2p1c3x4-sse-2x2.c | 144 const __m128 vi4x2 = _mm_loadu_ps(i4); i4 += 4; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() local 163 …vo1x1 = _mm_add_ps(vo1x1, _mm_mul_ps(vk20c1, _mm_shuffle_ps(vi4x2, vi4x2, _MM_SHUFFLE(0, 0, 0, 0))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() 184 …vo1x1 = _mm_add_ps(vo1x1, _mm_mul_ps(vk20c2, _mm_shuffle_ps(vi4x2, vi4x2, _MM_SHUFFLE(1, 1, 1, 1))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() 205 …vo1x1 = _mm_add_ps(vo1x1, _mm_mul_ps(vk21c0, _mm_shuffle_ps(vi4x2, vi4x2, _MM_SHUFFLE(2, 2, 2, 2))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() 226 …vo1x1 = _mm_add_ps(vo1x1, _mm_mul_ps(vk21c1, _mm_shuffle_ps(vi4x2, vi4x2, _MM_SHUFFLE(3, 3, 3, 3))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() 294 …vo1x0 = _mm_add_ps(vo1x0, _mm_mul_ps(vk22c1, _mm_shuffle_ps(vi4x2, vi4x2, _MM_SHUFFLE(0, 0, 0, 0))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() 315 …vo1x0 = _mm_add_ps(vo1x0, _mm_mul_ps(vk22c2, _mm_shuffle_ps(vi4x2, vi4x2, _MM_SHUFFLE(1, 1, 1, 1))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() 398 __m128 vi4x2 = _mm_setzero_ps(); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() local 405 vi4x2 = _mm_loadu_ps(i4 + 4); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() 425 …vo1x1 = _mm_add_ps(vo1x1, _mm_mul_ps(vk20c1, _mm_shuffle_ps(vi4x2, vi4x2, _MM_SHUFFLE(0, 0, 0, 0))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() [all …]
|
D | 3x3s2p1c3x4-neonfma-2x2.c | 144 const float32x4_t vi4x2 = vld1q_f32(i4); i4 += 4; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() local 163 vo1x1 = vfmaq_laneq_f32(vo1x1, vk20c1, vi4x2, 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() 184 vo1x1 = vfmaq_laneq_f32(vo1x1, vk20c2, vi4x2, 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() 205 vo1x1 = vfmaq_laneq_f32(vo1x1, vk21c0, vi4x2, 2); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() 226 vo1x1 = vfmaq_laneq_f32(vo1x1, vk21c1, vi4x2, 3); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() 294 vo1x0 = vfmaq_laneq_f32(vo1x0, vk22c1, vi4x2, 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() 315 vo1x0 = vfmaq_laneq_f32(vo1x0, vk22c2, vi4x2, 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() 398 float32x4_t vi4x2 = vmovq_n_f32(0.0f); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() local 405 vi4x2 = vld1q_f32(i4 + 4); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() 425 vo1x1 = vfmaq_laneq_f32(vo1x1, vk20c1, vi4x2, 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() [all …]
|
D | 3x3s2p1c3x4-neon-2x2.c | 144 const float32x4_t vi4x2 = vld1q_f32(i4); i4 += 4; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2() local 163 vo1x1 = vmlaq_lane_f32(vo1x1, vk20c1, vget_low_f32(vi4x2), 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2() 184 vo1x1 = vmlaq_lane_f32(vo1x1, vk20c2, vget_low_f32(vi4x2), 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2() 205 vo1x1 = vmlaq_lane_f32(vo1x1, vk21c0, vget_high_f32(vi4x2), 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2() 226 vo1x1 = vmlaq_lane_f32(vo1x1, vk21c1, vget_high_f32(vi4x2), 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2() 294 vo1x0 = vmlaq_lane_f32(vo1x0, vk22c1, vget_low_f32(vi4x2), 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2() 315 vo1x0 = vmlaq_lane_f32(vo1x0, vk22c2, vget_low_f32(vi4x2), 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2() 396 float32x4_t vi4x2 = vmovq_n_f32(0.0f); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2() local 403 vi4x2 = vld1q_f32(i4 + 4); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2() 423 vo1x1 = vmlaq_lane_f32(vo1x1, vk20c1, vget_low_f32(vi4x2), 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2() [all …]
|