/external/XNNPACK/src/x8-zip/ |
D | xm-sse2.c | 51 __m128i vxyzw1 = _mm_unpackhi_epi16(vxy_lo, vzw_lo); in xnn_x8_zip_xm_ukernel__sse2() local 67 unaligned_store_u32(output, (uint32_t) _mm_cvtsi128_si32(vxyzw1)); in xnn_x8_zip_xm_ukernel__sse2() 69 vxyzw1 = _mm_shufflelo_epi16(vxyzw1, _MM_SHUFFLE(3, 2, 3, 2)); in xnn_x8_zip_xm_ukernel__sse2() 70 unaligned_store_u32(output, (uint32_t) _mm_cvtsi128_si32(vxyzw1)); in xnn_x8_zip_xm_ukernel__sse2() 72 vxyzw1 = _mm_unpackhi_epi64(vxyzw1, vxyzw1); in xnn_x8_zip_xm_ukernel__sse2() 73 unaligned_store_u32(output, (uint32_t) _mm_cvtsi128_si32(vxyzw1)); in xnn_x8_zip_xm_ukernel__sse2() 75 vxyzw1 = _mm_shufflelo_epi16(vxyzw1, _MM_SHUFFLE(3, 2, 3, 2)); in xnn_x8_zip_xm_ukernel__sse2() 76 unaligned_store_u32(output, (uint32_t) _mm_cvtsi128_si32(vxyzw1)); in xnn_x8_zip_xm_ukernel__sse2() 116 __m128i vxyzw1 = _mm_unpackhi_epi16(vxy, vzw); in xnn_x8_zip_xm_ukernel__sse2() local 130 unaligned_store_u32(output, (uint32_t) _mm_cvtsi128_si32(vxyzw1)); in xnn_x8_zip_xm_ukernel__sse2() [all …]
|
D | x4-sse2.c | 40 const __m128i vxyzw1 = _mm_unpackhi_epi16(vxy_lo, vzw_lo); in xnn_x8_zip_x4_ukernel__sse2() local 44 _mm_storeu_si128((__m128i*) o + 1, vxyzw1); in xnn_x8_zip_x4_ukernel__sse2() 61 const __m128i vxyzw1 = _mm_unpackhi_epi16(vxy_lo, vzw_lo); in xnn_x8_zip_x4_ukernel__sse2() local 66 _mm_storeu_si128((__m128i*) o + 1, vxyzw1); in xnn_x8_zip_x4_ukernel__sse2()
|
D | xm-neon.c | 91 uint32x2_t vxyzw1 = vreinterpret_u32_u16(vxyzw_lo.val[1]); in xnn_x8_zip_xm_ukernel__neon() local 102 vst1_lane_u32((void*) output, vxyzw1, 0); in xnn_x8_zip_xm_ukernel__neon() 105 vst1_lane_u32((void*) output, vxyzw1, 1); in xnn_x8_zip_xm_ukernel__neon() 109 vxyzw1 = vxyzw3; in xnn_x8_zip_xm_ukernel__neon() 119 vxyzw0 = vxyzw1; in xnn_x8_zip_xm_ukernel__neon()
|
/external/XNNPACK/src/x32-zip/ |
D | x4-wasmsimd.c | 43 const v128_t vxyzw1 = wasm_v32x4_shuffle(vxy_lo, vzw_lo, 2, 3, 6, 7); in xnn_x32_zip_x4_ukernel__wasmsimd() local 48 wasm_v128_store(o + 4, vxyzw1); in xnn_x32_zip_x4_ukernel__wasmsimd()
|
D | x4-sse2.c | 43 const __m128i vxyzw1 = _mm_unpackhi_epi64(vxy_lo, vzw_lo); in xnn_x32_zip_x4_ukernel__sse2() local 48 _mm_storeu_si128((__m128i*) (o + 4), vxyzw1); in xnn_x32_zip_x4_ukernel__sse2()
|
D | xm-wasmsimd.c | 57 const v128_t vxyzw1 = wasm_v32x4_shuffle(vxy_lo, vzw_lo, 2, 3, 6, 7); in xnn_x32_zip_xm_ukernel__wasmsimd() local 64 wasm_v128_store(o, vxyzw1); in xnn_x32_zip_xm_ukernel__wasmsimd()
|
D | xm-sse2.c | 56 const __m128i vxyzw1 = _mm_unpackhi_epi64(vxy_lo, vzw_lo); in xnn_x32_zip_xm_ukernel__sse2() local 63 _mm_storeu_si128((__m128i*) output, vxyzw1); in xnn_x32_zip_xm_ukernel__sse2()
|
/external/XNNPACK/src/amalgam/ |
D | sse2.c | 13062 const __m128i vxyzw1 = _mm_unpackhi_epi64(vxy_lo, vzw_lo); in xnn_x32_zip_x4_ukernel__sse2() local 13067 _mm_storeu_si128((__m128i*) (o + 4), vxyzw1); in xnn_x32_zip_x4_ukernel__sse2() 13150 const __m128i vxyzw1 = _mm_unpackhi_epi64(vxy_lo, vzw_lo); in xnn_x32_zip_xm_ukernel__sse2() local 13157 _mm_storeu_si128((__m128i*) output, vxyzw1); in xnn_x32_zip_xm_ukernel__sse2() 14131 const __m128i vxyzw1 = _mm_unpackhi_epi16(vxy_lo, vzw_lo); in xnn_x8_zip_x4_ukernel__sse2() local 14135 _mm_storeu_si128((__m128i*) o + 1, vxyzw1); in xnn_x8_zip_x4_ukernel__sse2() 14152 const __m128i vxyzw1 = _mm_unpackhi_epi16(vxy_lo, vzw_lo); in xnn_x8_zip_x4_ukernel__sse2() local 14157 _mm_storeu_si128((__m128i*) o + 1, vxyzw1); in xnn_x8_zip_x4_ukernel__sse2() 14212 __m128i vxyzw1 = _mm_unpackhi_epi16(vxy_lo, vzw_lo); in xnn_x8_zip_xm_ukernel__sse2() local 14228 unaligned_store_u32(output, (uint32_t) _mm_cvtsi128_si32(vxyzw1)); in xnn_x8_zip_xm_ukernel__sse2() [all …]
|