/external/XNNPACK/src/f32-ibilinear/gen/ |
D | sse-c4.c | 62 const __m128 vo0123 = _mm_add_ps(vt0123, _mm_mul_ps(vd0123, valphav)); in xnn_f32_ibilinear_ukernel__sse_c4() local 64 _mm_storeu_ps(output, vo0123); in xnn_f32_ibilinear_ukernel__sse_c4() 81 __m128 vo0123 = _mm_add_ps(vt0123, _mm_mul_ps(vd0123, valphav)); in xnn_f32_ibilinear_ukernel__sse_c4() local 84 _mm_storel_pi((__m64*) output, vo0123); in xnn_f32_ibilinear_ukernel__sse_c4() 85 vo0123 = _mm_movehl_ps(vo0123, vo0123); in xnn_f32_ibilinear_ukernel__sse_c4() 89 _mm_store_ss(output, vo0123); in xnn_f32_ibilinear_ukernel__sse_c4()
|
D | sse-c8.c | 71 const __m128 vo0123 = _mm_add_ps(vt0123, _mm_mul_ps(vd0123, valphav)); in xnn_f32_ibilinear_ukernel__sse_c8() local 74 _mm_storeu_ps(output, vo0123); in xnn_f32_ibilinear_ukernel__sse_c8() 96 const __m128 vo0123 = _mm_add_ps(vt0123, _mm_mul_ps(vd0123, valphav)); in xnn_f32_ibilinear_ukernel__sse_c8() local 98 _mm_storeu_ps(output, vo0123); in xnn_f32_ibilinear_ukernel__sse_c8() 115 __m128 vo0123 = _mm_add_ps(vt0123, _mm_mul_ps(vd0123, valphav)); in xnn_f32_ibilinear_ukernel__sse_c8() local 118 _mm_storel_pi((__m64*) output, vo0123); in xnn_f32_ibilinear_ukernel__sse_c8() 119 vo0123 = _mm_movehl_ps(vo0123, vo0123); in xnn_f32_ibilinear_ukernel__sse_c8() 123 _mm_store_ss(output, vo0123); in xnn_f32_ibilinear_ukernel__sse_c8()
|
D | neonfma-c8.c | 76 const float32x4_t vo0123 = vfmaq_f32(vt0123, vd0123, valphav); in xnn_f32_ibilinear_ukernel__neonfma_c8() local 79 const float32x4_t vo0123 = vfmaq_lane_f32(vt0123, vd0123, valphahv, 1); in xnn_f32_ibilinear_ukernel__neonfma_c8() local 83 vst1q_f32(output, vo0123); output += 4; in xnn_f32_ibilinear_ukernel__neonfma_c8() 106 const float32x4_t vo0123 = vfmaq_f32(vt0123, vd0123, valphav); in xnn_f32_ibilinear_ukernel__neonfma_c8() local 108 const float32x4_t vo0123 = vfmaq_lane_f32(vt0123, vd0123, valphahv, 1); in xnn_f32_ibilinear_ukernel__neonfma_c8() local 111 vst1q_f32(output, vo0123); in xnn_f32_ibilinear_ukernel__neonfma_c8() 134 float32x4_t vo0123 = vfmaq_f32(vt0123, vd0123, valphav); in xnn_f32_ibilinear_ukernel__neonfma_c8() local 136 float32x4_t vo0123 = vfmaq_lane_f32(vt0123, vd0123, valphahv, 1); in xnn_f32_ibilinear_ukernel__neonfma_c8() local 139 float32x2_t vo01 = vget_low_f32(vo0123); in xnn_f32_ibilinear_ukernel__neonfma_c8() 142 vo01 = vget_high_f32(vo0123); in xnn_f32_ibilinear_ukernel__neonfma_c8()
|
D | neonfma-c4.c | 65 const float32x4_t vo0123 = vfmaq_f32(vt0123, vd0123, valphav); in xnn_f32_ibilinear_ukernel__neonfma_c4() local 67 const float32x4_t vo0123 = vfmaq_lane_f32(vt0123, vd0123, valphahv, 1); in xnn_f32_ibilinear_ukernel__neonfma_c4() local 70 vst1q_f32(output, vo0123); output += 4; in xnn_f32_ibilinear_ukernel__neonfma_c4() 92 float32x4_t vo0123 = vfmaq_f32(vt0123, vd0123, valphav); in xnn_f32_ibilinear_ukernel__neonfma_c4() local 94 float32x4_t vo0123 = vfmaq_lane_f32(vt0123, vd0123, valphahv, 1); in xnn_f32_ibilinear_ukernel__neonfma_c4() local 97 float32x2_t vo01 = vget_low_f32(vo0123); in xnn_f32_ibilinear_ukernel__neonfma_c4() 100 vo01 = vget_high_f32(vo0123); in xnn_f32_ibilinear_ukernel__neonfma_c4()
|
D | neon-c8.c | 64 const float32x4_t vo0123 = vmlaq_lane_f32(vt0123, vd0123, valphahv, 1); in xnn_f32_ibilinear_ukernel__neon_c8() local 67 vst1q_f32(output, vo0123); output += 4; in xnn_f32_ibilinear_ukernel__neon_c8() 84 const float32x4_t vo0123 = vmlaq_lane_f32(vt0123, vd0123, valphahv, 1); in xnn_f32_ibilinear_ukernel__neon_c8() local 86 vst1q_f32(output, vo0123); in xnn_f32_ibilinear_ukernel__neon_c8() 103 const float32x4_t vo0123 = vmlaq_lane_f32(vt0123, vd0123, valphahv, 1); in xnn_f32_ibilinear_ukernel__neon_c8() local 105 float32x2_t vo01 = vget_low_f32(vo0123); in xnn_f32_ibilinear_ukernel__neon_c8() 108 vo01 = vget_high_f32(vo0123); in xnn_f32_ibilinear_ukernel__neon_c8()
|
D | neon-c4.c | 55 const float32x4_t vo0123 = vmlaq_lane_f32(vt0123, vd0123, valphahv, 1); in xnn_f32_ibilinear_ukernel__neon_c4() local 57 vst1q_f32(output, vo0123); output += 4; in xnn_f32_ibilinear_ukernel__neon_c4() 73 const float32x4_t vo0123 = vmlaq_lane_f32(vt0123, vd0123, valphahv, 1); in xnn_f32_ibilinear_ukernel__neon_c4() local 75 float32x2_t vo01 = vget_low_f32(vo0123); in xnn_f32_ibilinear_ukernel__neon_c4() 78 vo01 = vget_high_f32(vo0123); in xnn_f32_ibilinear_ukernel__neon_c4()
|
D | wasmsimd-c8.c | 69 const v128_t vo0123 = wasm_f32x4_add(vt0123, wasm_f32x4_mul(vd0123, valphav)); in xnn_f32_ibilinear_ukernel__wasmsimd_c8() local 72 wasm_v128_store(output, vo0123); in xnn_f32_ibilinear_ukernel__wasmsimd_c8()
|
/external/XNNPACK/src/f32-ibilinear/ |
D | sse.c.in | 97 const __m128 vo0123 = _mm_add_ps(vt0123, _mm_mul_ps(vd0123, valphav)); 99 _mm_storeu_ps(output, vo0123); 116 __m128 vo0123 = _mm_add_ps(vt0123, _mm_mul_ps(vd0123, valphav)); 119 _mm_storel_pi((__m64*) output, vo0123); 120 vo0123 = _mm_movehl_ps(vo0123, vo0123); 124 _mm_store_ss(output, vo0123);
|
D | neon.c.in | 117 const float32x4_t vo0123 = vfmaq_f32(vt0123, vd0123, valphav); 119 const float32x4_t vo0123 = vfmaq_lane_f32(vt0123, vd0123, valphahv, 1); 122 const float32x4_t vo0123 = vmlaq_lane_f32(vt0123, vd0123, valphahv, 1); 124 vst1q_f32(output, vo0123); 152 float32x4_t vo0123 = vfmaq_f32(vt0123, vd0123, valphav); 154 float32x4_t vo0123 = vfmaq_lane_f32(vt0123, vd0123, valphahv, 1); 157 const float32x4_t vo0123 = vmlaq_lane_f32(vt0123, vd0123, valphahv, 1); 159 float32x2_t vo01 = vget_low_f32(vo0123); 162 vo01 = vget_high_f32(vo0123);
|
/external/XNNPACK/src/f32-ibilinear-chw/gen/ |
D | wasmsimd-p8.c | 113 const v128_t vo0123 = wasm_f32x4_add(vl0123, wasm_f32x4_mul(vd0123, valphah0123)); in xnn_f32_ibilinear_chw_ukernel__wasmsimd_p8() local 116 wasm_v128_store(output + 0, vo0123); in xnn_f32_ibilinear_chw_ukernel__wasmsimd_p8()
|
D | neonfma-p8.c | 115 const float32x4_t vo0123 = vfmaq_f32(vl0123, vd0123, valphah0123); in xnn_f32_ibilinear_chw_ukernel__neonfma_p8() local 118 vst1q_f32(output + 0, vo0123); in xnn_f32_ibilinear_chw_ukernel__neonfma_p8()
|
D | neon-p8.c | 115 const float32x4_t vo0123 = vmlaq_f32(vl0123, vd0123, valphah0123); in xnn_f32_ibilinear_chw_ukernel__neon_p8() local 118 vst1q_f32(output + 0, vo0123); in xnn_f32_ibilinear_chw_ukernel__neon_p8()
|