/external/XNNPACK/src/f32-ibilinear-chw/gen/ |
D | wasmsimd-p8.c | 90 const v128_t vldrd01 = wasm_f32x4_sub(vblbr01, vtltr01); in xnn_f32_ibilinear_chw_ukernel__wasmsimd_p8() local 95 const v128_t vld0123 = wasm_v32x4_shuffle(vldrd01, vldrd23, 0, 2, 4, 6); in xnn_f32_ibilinear_chw_ukernel__wasmsimd_p8() 96 const v128_t vrd0123 = wasm_v32x4_shuffle(vldrd01, vldrd23, 1, 3, 5, 7); in xnn_f32_ibilinear_chw_ukernel__wasmsimd_p8() 153 const v128_t vldrd01 = wasm_f32x4_sub(vblbr01, vtltr01); in xnn_f32_ibilinear_chw_ukernel__wasmsimd_p8() local 156 const v128_t vld = wasm_v32x4_shuffle(vldrd01, vldrd23, 0, 2, 4, 6); in xnn_f32_ibilinear_chw_ukernel__wasmsimd_p8() 157 const v128_t vrd = wasm_v32x4_shuffle(vldrd01, vldrd23, 1, 3, 5, 7); in xnn_f32_ibilinear_chw_ukernel__wasmsimd_p8()
|
D | sse-p8.c | 82 const __m128 vldrd01 = _mm_sub_ps(vblbr01, vtltr01); in xnn_f32_ibilinear_chw_ukernel__sse_p8() local 87 const __m128 vld0123 = _mm_shuffle_ps(vldrd01, vldrd23, _MM_SHUFFLE(2, 0, 2, 0)); in xnn_f32_ibilinear_chw_ukernel__sse_p8() 88 const __m128 vrd0123 = _mm_shuffle_ps(vldrd01, vldrd23, _MM_SHUFFLE(3, 1, 3, 1)); in xnn_f32_ibilinear_chw_ukernel__sse_p8() 141 const __m128 vldrd01 = _mm_sub_ps(vblbr01, vtltr01); in xnn_f32_ibilinear_chw_ukernel__sse_p8() local 144 const __m128 vld = _mm_shuffle_ps(vldrd01, vldrd23, _MM_SHUFFLE(2, 0, 2, 0)); in xnn_f32_ibilinear_chw_ukernel__sse_p8() 145 const __m128 vrd = _mm_shuffle_ps(vldrd01, vldrd23, _MM_SHUFFLE(3, 1, 3, 1)); in xnn_f32_ibilinear_chw_ukernel__sse_p8()
|
D | wasmsimd-p4.c | 67 const v128_t vldrd01 = wasm_f32x4_sub(vblbr01, vtltr01); in xnn_f32_ibilinear_chw_ukernel__wasmsimd_p4() local 70 const v128_t vld = wasm_v32x4_shuffle(vldrd01, vldrd23, 0, 2, 4, 6); in xnn_f32_ibilinear_chw_ukernel__wasmsimd_p4() 71 const v128_t vrd = wasm_v32x4_shuffle(vldrd01, vldrd23, 1, 3, 5, 7); in xnn_f32_ibilinear_chw_ukernel__wasmsimd_p4()
|
D | sse-p4.c | 63 const __m128 vldrd01 = _mm_sub_ps(vblbr01, vtltr01); in xnn_f32_ibilinear_chw_ukernel__sse_p4() local 66 const __m128 vld = _mm_shuffle_ps(vldrd01, vldrd23, _MM_SHUFFLE(2, 0, 2, 0)); in xnn_f32_ibilinear_chw_ukernel__sse_p4() 67 const __m128 vrd = _mm_shuffle_ps(vldrd01, vldrd23, _MM_SHUFFLE(3, 1, 3, 1)); in xnn_f32_ibilinear_chw_ukernel__sse_p4()
|
D | neonfma-p8.c | 88 const float32x4_t vldrd01 = vsubq_f32(vblbr01, vtltr01); in xnn_f32_ibilinear_chw_ukernel__neonfma_p8() local 93 const float32x4x2_t vld_t0123 = vuzpq_f32(vldrd01, vldrd23); in xnn_f32_ibilinear_chw_ukernel__neonfma_p8() 154 const float32x4_t vldrd01 = vsubq_f32(vblbr01, vtltr01); in xnn_f32_ibilinear_chw_ukernel__neonfma_p8() local 157 const float32x4x2_t vld_t = vuzpq_f32(vldrd01, vldrd23); in xnn_f32_ibilinear_chw_ukernel__neonfma_p8()
|
D | neon-p8.c | 88 const float32x4_t vldrd01 = vsubq_f32(vblbr01, vtltr01); in xnn_f32_ibilinear_chw_ukernel__neon_p8() local 93 const float32x4x2_t vld_t0123 = vuzpq_f32(vldrd01, vldrd23); in xnn_f32_ibilinear_chw_ukernel__neon_p8() 154 const float32x4_t vldrd01 = vsubq_f32(vblbr01, vtltr01); in xnn_f32_ibilinear_chw_ukernel__neon_p8() local 157 const float32x4x2_t vld_t = vuzpq_f32(vldrd01, vldrd23); in xnn_f32_ibilinear_chw_ukernel__neon_p8()
|
D | neonfma-p4.c | 66 const float32x4_t vldrd01 = vsubq_f32(vblbr01, vtltr01); in xnn_f32_ibilinear_chw_ukernel__neonfma_p4() local 69 const float32x4x2_t vld_t = vuzpq_f32(vldrd01, vldrd23); in xnn_f32_ibilinear_chw_ukernel__neonfma_p4()
|
D | neon-p4.c | 66 const float32x4_t vldrd01 = vsubq_f32(vblbr01, vtltr01); in xnn_f32_ibilinear_chw_ukernel__neon_p4() local 69 const float32x4x2_t vld_t = vuzpq_f32(vldrd01, vldrd23); in xnn_f32_ibilinear_chw_ukernel__neon_p4()
|
D | neonfma-p16.c | 134 const float32x4_t vldrd01 = vsubq_f32(vblbr01, vtltr01); in xnn_f32_ibilinear_chw_ukernel__neonfma_p16() local 143 const float32x4x2_t vld_t0123 = vuzpq_f32(vldrd01, vldrd23); in xnn_f32_ibilinear_chw_ukernel__neonfma_p16() 226 const float32x4_t vldrd01 = vsubq_f32(vblbr01, vtltr01); in xnn_f32_ibilinear_chw_ukernel__neonfma_p16() local 229 const float32x4x2_t vld_t = vuzpq_f32(vldrd01, vldrd23); in xnn_f32_ibilinear_chw_ukernel__neonfma_p16()
|
D | neon-p16.c | 134 const float32x4_t vldrd01 = vsubq_f32(vblbr01, vtltr01); in xnn_f32_ibilinear_chw_ukernel__neon_p16() local 143 const float32x4x2_t vld_t0123 = vuzpq_f32(vldrd01, vldrd23); in xnn_f32_ibilinear_chw_ukernel__neon_p16() 226 const float32x4_t vldrd01 = vsubq_f32(vblbr01, vtltr01); in xnn_f32_ibilinear_chw_ukernel__neon_p16() local 229 const float32x4x2_t vld_t = vuzpq_f32(vldrd01, vldrd23); in xnn_f32_ibilinear_chw_ukernel__neon_p16()
|
/external/XNNPACK/src/f32-ibilinear-chw/ |
D | wasmsimd.c.in | 111 const v128_t vld = wasm_v32x4_shuffle(vldrd01, vldrd23, 0, 2, 4, 6); 112 const v128_t vrd = wasm_v32x4_shuffle(vldrd01, vldrd23, 1, 3, 5, 7);
|
D | sse.c.in | 107 const __m128 vld = _mm_shuffle_ps(vldrd01, vldrd23, _MM_SHUFFLE(2, 0, 2, 0)); 108 const __m128 vrd = _mm_shuffle_ps(vldrd01, vldrd23, _MM_SHUFFLE(3, 1, 3, 1));
|
D | neon.c.in | 109 const float32x4x2_t vld_t = vuzpq_f32(vldrd01, vldrd23);
|
/external/XNNPACK/src/amalgam/ |
D | sse.c | 5041 const __m128 vldrd01 = _mm_sub_ps(vblbr01, vtltr01); in xnn_f32_ibilinear_chw_ukernel__sse_p8() local 5046 const __m128 vld0123 = _mm_shuffle_ps(vldrd01, vldrd23, _MM_SHUFFLE(2, 0, 2, 0)); in xnn_f32_ibilinear_chw_ukernel__sse_p8() 5047 const __m128 vrd0123 = _mm_shuffle_ps(vldrd01, vldrd23, _MM_SHUFFLE(3, 1, 3, 1)); in xnn_f32_ibilinear_chw_ukernel__sse_p8() 5100 const __m128 vldrd01 = _mm_sub_ps(vblbr01, vtltr01); in xnn_f32_ibilinear_chw_ukernel__sse_p8() local 5103 const __m128 vld = _mm_shuffle_ps(vldrd01, vldrd23, _MM_SHUFFLE(2, 0, 2, 0)); in xnn_f32_ibilinear_chw_ukernel__sse_p8() 5104 const __m128 vrd = _mm_shuffle_ps(vldrd01, vldrd23, _MM_SHUFFLE(3, 1, 3, 1)); in xnn_f32_ibilinear_chw_ukernel__sse_p8()
|