/external/XNNPACK/src/f32-conv-hwc2chw/ |
D | 3x3s2p1c3x4-sse-1x1.c | 63 float* o0c3 = (float*) ((uintptr_t) o0c2 + output_channel_stride); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_1x1() local 72 o0c3 = o0c2; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_1x1() 207 _mm_store_ss(o0c3, _mm_shuffle_ps(voc0123, voc0123, 3)); o0c3++; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_1x1() 286 _mm_store_ss(o0c3, _mm_shuffle_ps(voc0123, voc0123, 3)); o0c3++; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_1x1() 293 o0c3 = (float*) ((uintptr_t) o0c3 + output_channel_increment); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_1x1()
|
D | 3x3s2p1c3x4-neonfma-2x2.c | 79 float* o0c3 = (float*) ((uintptr_t) o0c2 + output_channel_stride); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() local 91 o0c3 = o0c2; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() 349 vst1_f32(o0c3, vget_high_f32(vo0c23)); o0c3 += 2; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() 609 vst1_f32(o0c3, vget_high_f32(vo0c23)); o0c3 += 2; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() 621 vst1q_lane_f32(o0c3, vo0x0, 3); o0c3 += 1; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() 629 o0c3 = (float*) ((uintptr_t) o0c3 + output_channel_increment); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2()
|
D | 3x3s2p1c3x4-neon-2x2.c | 79 float* o0c3 = (float*) ((uintptr_t) o0c2 + output_channel_stride); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2() local 91 o0c3 = o0c2; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2() 347 vst1_f32(o0c3, vget_high_f32(vo0c0123.val[1])); o0c3 += 2; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2() 605 vst1_f32(o0c3, vget_high_f32(vo0c0123.val[1])); o0c3 += 2; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2() 617 vst1q_lane_f32(o0c3, vo0x0, 3); o0c3 += 1; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2() 625 o0c3 = (float*) ((uintptr_t) o0c3 + output_channel_increment); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
|
D | 3x3s2p1c3x4-scalar-1x1.c | 61 float* o0c3 = (float*) ((uintptr_t) o0c2 + output_channel_stride); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__scalar_1x1() local 70 o0c3 = o0c2; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__scalar_1x1() 422 *o0c3++ = voc3; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__scalar_1x1() 646 *o0c3++ = voc3; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__scalar_1x1() 653 o0c3 = (float*) ((uintptr_t) o0c3 + output_channel_increment); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__scalar_1x1()
|
D | 3x3s2p1c3x4-wasmsimd-2x2.c | 79 float* o0c3 = (float*) ((uintptr_t) o0c2 + output_channel_stride); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() local 91 o0c3 = o0c2; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() 348 …*((double*) o0c3) = wasm_f64x2_extract_lane(wasm_v32x4_shuffle(vo0c23, vo0c23, 2, 3, 2, 3), 0); o0… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() 608 …*((double*) o0c3) = wasm_f64x2_extract_lane(wasm_v32x4_shuffle(vo0c23, vo0c23, 2, 3, 2, 3), 0); o0… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() 619 *o0c3 = wasm_f32x4_extract_lane(vo0x0, 3); o0c3 += 1; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() 627 o0c3 = (float*) ((uintptr_t) o0c3 + output_channel_increment); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
|
D | 3x3s2p1c3x4-sse-2x2.c | 79 float* o0c3 = (float*) ((uintptr_t) o0c2 + output_channel_stride); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() local 91 o0c3 = o0c2; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() 349 … _mm_storel_pi((__m64 *)o0c3, _mm_shuffle_ps(vo0c23, vo0c23, _MM_SHUFFLE(3, 2, 3, 2))); o0c3 += 2; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() 609 … _mm_storel_pi((__m64 *)o0c3, _mm_shuffle_ps(vo0c23, vo0c23, _MM_SHUFFLE(3, 2, 3, 2))); o0c3 += 2; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() 621 _mm_store_ss(o0c3, _mm_shuffle_ps(vo0x0, vo0x0, _MM_SHUFFLE(3, 3, 3, 3))); o0c3 += 1; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() 629 o0c3 = (float*) ((uintptr_t) o0c3 + output_channel_increment); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2()
|
/external/XNNPACK/src/f16-conv-hwc2chw/ |
D | 3x3s2p1c3x4-neonfp16arith-2x2.c | 79 __fp16* o0c3 = (__fp16*) ((uintptr_t) o0c2 + output_channel_stride); in xnn_f16_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfp16arith_2x2() local 91 o0c3 = o0c2; in xnn_f16_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfp16arith_2x2() 349 vst1_lane_u32((void*) o0c3, vreinterpret_u32_f16(vo0c23), 1); o0c3 += 2; in xnn_f16_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfp16arith_2x2() 610 vst1_lane_u32((void*) o0c3, vreinterpret_u32_f16(vo0c23), 1); o0c3 += 2; in xnn_f16_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfp16arith_2x2() 622 vst1_lane_f16(o0c3, vo0x0, 3); o0c3 += 1; in xnn_f16_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfp16arith_2x2() 630 o0c3 = (__fp16*) ((uintptr_t) o0c3 + output_channel_increment); in xnn_f16_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfp16arith_2x2()
|
/external/XNNPACK/src/amalgam/ |
D | sse.c | 587 float* o0c3 = (float*) ((uintptr_t) o0c2 + output_channel_stride); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() local 599 o0c3 = o0c2; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() 857 … _mm_storel_pi((__m64 *)o0c3, _mm_shuffle_ps(vo0c23, vo0c23, _MM_SHUFFLE(3, 2, 3, 2))); o0c3 += 2; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() 1117 … _mm_storel_pi((__m64 *)o0c3, _mm_shuffle_ps(vo0c23, vo0c23, _MM_SHUFFLE(3, 2, 3, 2))); o0c3 += 2; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() 1129 _mm_store_ss(o0c3, _mm_shuffle_ps(vo0x0, vo0x0, _MM_SHUFFLE(3, 3, 3, 3))); o0c3 += 1; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() 1137 o0c3 = (float*) ((uintptr_t) o0c3 + output_channel_increment); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2()
|