/external/XNNPACK/src/f32-prelu/gen/ |
D | wasm-2x4.c | 59 float vi1x3 = i1[3]; in xnn_f32_prelu_ukernel__wasm_2x4() local 76 float vacc1x3 = __builtin_wasm_max_f32(vi1x3, vzero); in xnn_f32_prelu_ukernel__wasm_2x4() 77 vi1x3 = __builtin_wasm_min_f32(vi1x3, vzero); in xnn_f32_prelu_ukernel__wasm_2x4() 86 vacc1x3 += vi1x3 * vw3; in xnn_f32_prelu_ukernel__wasm_2x4()
|
D | scalar-2x4.c | 58 const float vi1x3 = i1[3]; in xnn_f32_prelu_ukernel__scalar_2x4() local 68 const float vacc1x3 = XNN_UNPREDICTABLE(vi1x3 < 0.0f) ? vi1x3 * vw3 : vi1x3; in xnn_f32_prelu_ukernel__scalar_2x4()
|
/external/XNNPACK/src/f32-dwconv2d-chw/gen/ |
D | 5x5p2-minmax-scalar-1x1-acc4.c | 99 float vi1x3 = *i1++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4() local 142 vi1x2 = vi1x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4() 148 vo0p0 += vi1x3 * vk13; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4() 154 vi1x3 = vi1x4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4() 208 vi1x2 = vi1x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4() 214 vo0p0 += vi1x3 * vk13; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4()
|
D | 5x5p2-minmax-scalar-1x1-acc3.c | 99 float vi1x3 = *i1++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3() local 142 vi1x2 = vi1x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3() 148 vo0p1 += vi1x3 * vk13; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3() 154 vi1x3 = vi1x4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3() 207 vi1x2 = vi1x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3() 213 vo0p1 += vi1x3 * vk13; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3()
|
D | 5x5p2-minmax-scalar-1x1.c | 99 float vi1x3 = *i1++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1() local 142 vi1x2 = vi1x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1() 148 vo0p0 += vi1x3 * vk13; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1() 154 vi1x3 = vi1x4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1() 205 vi1x2 = vi1x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1() 211 vo0p0 += vi1x3 * vk13; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1()
|
D | 5x5p2-minmax-scalar-1x1-acc2.c | 99 float vi1x3 = *i1++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2() local 142 vi1x2 = vi1x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2() 148 vo0p0 += vi1x3 * vk13; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2() 154 vi1x3 = vi1x4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2() 206 vi1x2 = vi1x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2() 212 vo0p0 += vi1x3 * vk13; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2()
|
D | 5x5p2-minmax-scalar-2x1.c | 108 float vi1x3 = *i1++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() local 170 vi1x2 = vi1x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 177 vo1p0 += vi1x3 * vk03; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 178 vo0p0 += vi1x3 * vk13; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 188 vi1x3 = vi1x4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 265 vi1x2 = vi1x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 272 vo1p0 += vi1x3 * vk03; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() 273 vo0p0 += vi1x3 * vk13; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
|
D | 5x5p2-minmax-scalar-1x1-acc5.c | 99 float vi1x3 = *i1++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5() local 142 vi1x2 = vi1x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5() 148 vo0p1 += vi1x3 * vk13; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5() 154 vi1x3 = vi1x4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5() 209 vi1x2 = vi1x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5() 215 vo0p1 += vi1x3 * vk13; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5()
|
D | 5x5p2-minmax-scalar-2x1-acc3.c | 108 float vi1x3 = *i1++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() local 170 vi1x2 = vi1x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 177 vo1p0 += vi1x3 * vk03; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 178 vo0p1 += vi1x3 * vk13; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 188 vi1x3 = vi1x4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 269 vi1x2 = vi1x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 276 vo1p0 += vi1x3 * vk03; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() 277 vo0p1 += vi1x3 * vk13; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
|
D | 5x5p2-minmax-scalar-2x1-acc2.c | 108 float vi1x3 = *i1++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() local 170 vi1x2 = vi1x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 177 vo1p1 += vi1x3 * vk03; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 178 vo0p0 += vi1x3 * vk13; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 188 vi1x3 = vi1x4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 267 vi1x2 = vi1x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 274 vo1p1 += vi1x3 * vk03; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() 275 vo0p0 += vi1x3 * vk13; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
|
D | 5x5s2p2-minmax-scalar-1x1-acc4.c | 107 const float vi1x3 = i1[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4() local 142 vi1x1 = vi1x3; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4() 160 vo0p0 += vi1x3 * vk13; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4() 183 const float vi1x3 = *i1++; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4() local 207 vo0p0 += vi1x3 * vk13; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4()
|
D | 5x5s2p2-minmax-scalar-1x1.c | 107 const float vi1x3 = i1[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1() local 142 vi1x1 = vi1x3; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1() 160 vo0p0 += vi1x3 * vk13; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1() 180 const float vi1x3 = *i1++; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1() local 204 vo0p0 += vi1x3 * vk13; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1()
|
D | 5x5s2p2-minmax-scalar-1x1-acc2.c | 107 const float vi1x3 = i1[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2() local 142 vi1x1 = vi1x3; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2() 160 vo0p0 += vi1x3 * vk13; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2() 181 const float vi1x3 = *i1++; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2() local 205 vo0p0 += vi1x3 * vk13; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2()
|
D | 5x5s2p2-minmax-scalar-1x1-acc3.c | 107 const float vi1x3 = i1[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3() local 142 vi1x1 = vi1x3; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3() 160 vo0p1 += vi1x3 * vk13; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3() 182 const float vi1x3 = *i1++; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3() local 206 vo0p1 += vi1x3 * vk13; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3()
|
D | 5x5s2p2-minmax-scalar-1x1-acc5.c | 107 const float vi1x3 = i1[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5() local 142 vi1x1 = vi1x3; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5() 160 vo0p1 += vi1x3 * vk13; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5() 184 const float vi1x3 = *i1++; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5() local 208 vo0p1 += vi1x3 * vk13; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5()
|
D | 5x5p2-minmax-scalar-3x1.c | 117 float vi1x3 = *i1++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() local 198 vi1x2 = vi1x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 206 vo1p0 += vi1x3 * vk03; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 208 vo0p0 += vi1x3 * vk13; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 222 vi1x3 = vi1x4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 325 vi1x2 = vi1x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 333 vo1p0 += vi1x3 * vk03; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() 335 vo0p0 += vi1x3 * vk13; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
|
D | 5x5p2-minmax-scalar-3x1-acc2.c | 117 float vi1x3 = *i1++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() local 198 vi1x2 = vi1x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 206 vo1p1 += vi1x3 * vk03; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 208 vo0p0 += vi1x3 * vk13; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 222 vi1x3 = vi1x4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 328 vi1x2 = vi1x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 336 vo1p1 += vi1x3 * vk03; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() 338 vo0p0 += vi1x3 * vk13; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
|
D | 5x5s2p2-minmax-scalar-2x1-acc3.c | 124 const float vi1x3 = i1[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() local 177 vi1x1 = vi1x3; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() 205 vo0p1 += vi1x3 * vk13; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() 241 const float vi1x3 = *i1++; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() local 283 vo0p1 += vi1x3 * vk13; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
|
D | 5x5s2p2-minmax-scalar-2x1-acc2.c | 124 const float vi1x3 = i1[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() local 177 vi1x1 = vi1x3; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() 205 vo0p0 += vi1x3 * vk13; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() 239 const float vi1x3 = *i1++; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() local 281 vo0p0 += vi1x3 * vk13; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2()
|
D | 5x5s2p2-minmax-scalar-2x1.c | 124 const float vi1x3 = i1[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() local 177 vi1x1 = vi1x3; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() 205 vo0p0 += vi1x3 * vk13; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() 237 const float vi1x3 = *i1++; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() local 279 vo0p0 += vi1x3 * vk13; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
|
D | 5x5s2p2-minmax-scalar-3x1-acc2.c | 140 const float vi1x3 = i1[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() local 211 vi1x1 = vi1x3; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() 249 vo0p0 += vi1x3 * vk13; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() 296 const float vi1x3 = *i1++; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() local 356 vo0p0 += vi1x3 * vk13; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
|
/external/XNNPACK/src/f32-conv-hwc2chw/ |
D | 3x3s2p1c3x4-wasmsimd-2x2.c | 233 const v128_t vi1x3 = wasm_v128_load(i1); i1 += 4; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() local 247 …vo0x1 = wasm_f32x4_add(vo0x1, wasm_f32x4_mul(vk11c2, wasm_v32x4_shuffle(vi1x3, vi1x3, 0, 0, 0, 0))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() 268 …vo0x1 = wasm_f32x4_add(vo0x1, wasm_f32x4_mul(vk12c0, wasm_v32x4_shuffle(vi1x3, vi1x3, 1, 1, 1, 1))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() 289 …vo0x1 = wasm_f32x4_add(vo0x1, wasm_f32x4_mul(vk12c1, wasm_v32x4_shuffle(vi1x3, vi1x3, 2, 2, 2, 2))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() 310 …vo0x1 = wasm_f32x4_add(vo0x1, wasm_f32x4_mul(vk12c2, wasm_v32x4_shuffle(vi1x3, vi1x3, 3, 3, 3, 3))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() 321 vi1x0 = vi1x3; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() 505 v128_t vi1x3 = vzero; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() local 512 vi1x3 = wasm_v32x4_load_splat(i1 + 8); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() 527 …vo0x1 = wasm_f32x4_add(vo0x1, wasm_f32x4_mul(vk11c2, wasm_v32x4_shuffle(vi1x3, vi1x3, 0, 0, 0, 0))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
|
D | 3x3s2p1c3x4-sse-2x2.c | 232 const __m128 vi1x3 = _mm_loadu_ps(i1); i1 += 4; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() local 246 …vo0x1 = _mm_add_ps(vo0x1, _mm_mul_ps(vk11c2, _mm_shuffle_ps(vi1x3, vi1x3, _MM_SHUFFLE(0, 0, 0, 0))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() 267 …vo0x1 = _mm_add_ps(vo0x1, _mm_mul_ps(vk12c0, _mm_shuffle_ps(vi1x3, vi1x3, _MM_SHUFFLE(1, 1, 1, 1))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() 288 …vo0x1 = _mm_add_ps(vo0x1, _mm_mul_ps(vk12c1, _mm_shuffle_ps(vi1x3, vi1x3, _MM_SHUFFLE(2, 2, 2, 2))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() 309 …vo0x1 = _mm_add_ps(vo0x1, _mm_mul_ps(vk12c2, _mm_shuffle_ps(vi1x3, vi1x3, _MM_SHUFFLE(3, 3, 3, 3))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() 320 vi1x0 = vi1x3; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() 505 __m128 vi1x3 = _mm_setzero_ps(); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() local 512 vi1x3 = _mm_load_ss(i1 + 8); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() 527 …vo0x1 = _mm_add_ps(vo0x1, _mm_mul_ps(vk11c2, _mm_shuffle_ps(vi1x3, vi1x3, _MM_SHUFFLE(0, 0, 0, 0))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2()
|
D | 3x3s2p1c3x4-neonfma-2x2.c | 232 const float32x4_t vi1x3 = vld1q_f32(i1); i1 += 4; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() local 246 vo0x1 = vfmaq_laneq_f32(vo0x1, vk11c2, vi1x3, 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() 267 vo0x1 = vfmaq_laneq_f32(vo0x1, vk12c0, vi1x3, 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() 288 vo0x1 = vfmaq_laneq_f32(vo0x1, vk12c1, vi1x3, 2); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() 309 vo0x1 = vfmaq_laneq_f32(vo0x1, vk12c2, vi1x3, 3); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() 320 vi1x0 = vi1x3; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() 505 float32x4_t vi1x3 = vmovq_n_f32(0.0f); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() local 512 vi1x3 = vld1q_lane_f32(i1 + 8, vi1x3, 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() 527 vo0x1 = vfmaq_laneq_f32(vo0x1, vk11c2, vi1x3, 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2()
|
D | 3x3s2p1c3x4-neon-2x2.c | 232 const float32x4_t vi1x3 = vld1q_f32(i1); i1 += 4; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2() local 246 vo0x1 = vmlaq_lane_f32(vo0x1, vk11c2, vget_low_f32(vi1x3), 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2() 267 vo0x1 = vmlaq_lane_f32(vo0x1, vk12c0, vget_low_f32(vi1x3), 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2() 288 vo0x1 = vmlaq_lane_f32(vo0x1, vk12c1, vget_high_f32(vi1x3), 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2() 309 vo0x1 = vmlaq_lane_f32(vo0x1, vk12c2, vget_high_f32(vi1x3), 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2() 320 vi1x0 = vi1x3; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2() 503 float32x4_t vi1x3 = vmovq_n_f32(0.0f); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2() local 510 vi1x3 = vld1q_lane_f32(i1 + 8, vi1x3, 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2() 525 vo0x1 = vmlaq_lane_f32(vo0x1, vk11c2, vget_low_f32(vi1x3), 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
|