Home
last modified time | relevance | path

Searched refs:valphahv (Results 1 – 8 of 8) sorted by relevance

/external/XNNPACK/src/f32-bilinear/gen/
Dneon-c8.c38 const float32x2_t valphahv = vld1_f32(weights); weights += 2; in xnn_f32_bilinear_ukernel__neon_c8() local
56 const float32x4_t vt0123 = vmlaq_lane_f32(vtl0123, vtd0123, valphahv, 0); in xnn_f32_bilinear_ukernel__neon_c8()
57 const float32x4_t vb0123 = vmlaq_lane_f32(vbl0123, vbd0123, valphahv, 0); in xnn_f32_bilinear_ukernel__neon_c8()
58 const float32x4_t vt4567 = vmlaq_lane_f32(vtl4567, vtd4567, valphahv, 0); in xnn_f32_bilinear_ukernel__neon_c8()
59 const float32x4_t vb4567 = vmlaq_lane_f32(vbl4567, vbd4567, valphahv, 0); in xnn_f32_bilinear_ukernel__neon_c8()
64 const float32x4_t vo0123 = vmlaq_lane_f32(vt0123, vd0123, valphahv, 1); in xnn_f32_bilinear_ukernel__neon_c8()
65 const float32x4_t vo4567 = vmlaq_lane_f32(vt4567, vd4567, valphahv, 1); in xnn_f32_bilinear_ukernel__neon_c8()
79 const float32x4_t vt0123 = vmlaq_lane_f32(vtl0123, vtd0123, valphahv, 0); in xnn_f32_bilinear_ukernel__neon_c8()
80 const float32x4_t vb0123 = vmlaq_lane_f32(vbl0123, vbd0123, valphahv, 0); in xnn_f32_bilinear_ukernel__neon_c8()
84 const float32x4_t vo0123 = vmlaq_lane_f32(vt0123, vd0123, valphahv, 1); in xnn_f32_bilinear_ukernel__neon_c8()
[all …]
Dneonfma-c8.c38 const float32x2_t valphahv = vld1_f32(weights); weights += 2; in xnn_f32_bilinear_ukernel__neonfma_c8() local
40 const float32x4_t valphah = vdupq_lane_f32(valphahv, 0); in xnn_f32_bilinear_ukernel__neonfma_c8()
41 const float32x4_t valphav = vdupq_lane_f32(valphahv, 1); in xnn_f32_bilinear_ukernel__neonfma_c8()
66 const float32x4_t vt0123 = vfmaq_lane_f32(vtl0123, vtd0123, valphahv, 0); in xnn_f32_bilinear_ukernel__neonfma_c8()
67 const float32x4_t vb0123 = vfmaq_lane_f32(vbl0123, vbd0123, valphahv, 0); in xnn_f32_bilinear_ukernel__neonfma_c8()
68 const float32x4_t vt4567 = vfmaq_lane_f32(vtl4567, vtd4567, valphahv, 0); in xnn_f32_bilinear_ukernel__neonfma_c8()
69 const float32x4_t vb4567 = vfmaq_lane_f32(vbl4567, vbd4567, valphahv, 0); in xnn_f32_bilinear_ukernel__neonfma_c8()
79 const float32x4_t vo0123 = vfmaq_lane_f32(vt0123, vd0123, valphahv, 1); in xnn_f32_bilinear_ukernel__neonfma_c8()
80 const float32x4_t vo4567 = vfmaq_lane_f32(vt4567, vd4567, valphahv, 1); in xnn_f32_bilinear_ukernel__neonfma_c8()
99 const float32x4_t vt0123 = vfmaq_lane_f32(vtl0123, vtd0123, valphahv, 0); in xnn_f32_bilinear_ukernel__neonfma_c8()
[all …]
Dneonfma-c4.c38 const float32x2_t valphahv = vld1_f32(weights); weights += 2; in xnn_f32_bilinear_ukernel__neonfma_c4() local
40 const float32x4_t valphah = vdupq_lane_f32(valphahv, 0); in xnn_f32_bilinear_ukernel__neonfma_c4()
41 const float32x4_t valphav = vdupq_lane_f32(valphahv, 1); in xnn_f32_bilinear_ukernel__neonfma_c4()
58 const float32x4_t vt0123 = vfmaq_lane_f32(vtl0123, vtd0123, valphahv, 0); in xnn_f32_bilinear_ukernel__neonfma_c4()
59 const float32x4_t vb0123 = vfmaq_lane_f32(vbl0123, vbd0123, valphahv, 0); in xnn_f32_bilinear_ukernel__neonfma_c4()
67 const float32x4_t vo0123 = vfmaq_lane_f32(vt0123, vd0123, valphahv, 1); in xnn_f32_bilinear_ukernel__neonfma_c4()
85 const float32x4_t vt0123 = vfmaq_lane_f32(vtl0123, vtd0123, valphahv, 0); in xnn_f32_bilinear_ukernel__neonfma_c4()
86 const float32x4_t vb0123 = vfmaq_lane_f32(vbl0123, vbd0123, valphahv, 0); in xnn_f32_bilinear_ukernel__neonfma_c4()
94 float32x4_t vo0123 = vfmaq_lane_f32(vt0123, vd0123, valphahv, 1); in xnn_f32_bilinear_ukernel__neonfma_c4()
Dsse-c4.c37 __m128 valphahv = _mm_loadl_pi(_mm_undefined_ps(), (const __m64*) weights); in xnn_f32_bilinear_ukernel__sse_c4() local
38 valphahv = _mm_unpacklo_ps(valphahv, valphahv); in xnn_f32_bilinear_ukernel__sse_c4()
39 const __m128 valphah = _mm_movelh_ps(valphahv, valphahv); in xnn_f32_bilinear_ukernel__sse_c4()
40 const __m128 valphav = _mm_movehl_ps(valphahv, valphahv); in xnn_f32_bilinear_ukernel__sse_c4()
Dneon-c4.c38 const float32x2_t valphahv = vld1_f32(weights); weights += 2; in xnn_f32_bilinear_ukernel__neon_c4() local
50 const float32x4_t vt0123 = vmlaq_lane_f32(vtl0123, vtd0123, valphahv, 0); in xnn_f32_bilinear_ukernel__neon_c4()
51 const float32x4_t vb0123 = vmlaq_lane_f32(vbl0123, vbd0123, valphahv, 0); in xnn_f32_bilinear_ukernel__neon_c4()
55 const float32x4_t vo0123 = vmlaq_lane_f32(vt0123, vd0123, valphahv, 1); in xnn_f32_bilinear_ukernel__neon_c4()
68 const float32x4_t vt0123 = vmlaq_lane_f32(vtl0123, vtd0123, valphahv, 0); in xnn_f32_bilinear_ukernel__neon_c4()
69 const float32x4_t vb0123 = vmlaq_lane_f32(vbl0123, vbd0123, valphahv, 0); in xnn_f32_bilinear_ukernel__neon_c4()
73 const float32x4_t vo0123 = vmlaq_lane_f32(vt0123, vd0123, valphahv, 1); in xnn_f32_bilinear_ukernel__neon_c4()
Dsse-c8.c37 __m128 valphahv = _mm_loadl_pi(_mm_undefined_ps(), (const __m64*) weights); in xnn_f32_bilinear_ukernel__sse_c8() local
38 valphahv = _mm_unpacklo_ps(valphahv, valphahv); in xnn_f32_bilinear_ukernel__sse_c8()
39 const __m128 valphah = _mm_movelh_ps(valphahv, valphahv); in xnn_f32_bilinear_ukernel__sse_c8()
40 const __m128 valphav = _mm_movehl_ps(valphahv, valphahv); in xnn_f32_bilinear_ukernel__sse_c8()
/external/XNNPACK/src/f32-bilinear/
Dneon.c.in39 const float32x2_t valphahv = vld1_f32(weights); weights += 2;
42 const float32x4_t valphah = vdupq_lane_f32(valphahv, 0);
43 const float32x4_t valphav = vdupq_lane_f32(valphahv, 1);
65 …onst float32x4_t vt${ABC[C:C+4]} = vfmaq_lane_f32(vtl${ABC[C:C+4]}, vtd${ABC[C:C+4]}, valphahv, 0);
66 …onst float32x4_t vb${ABC[C:C+4]} = vfmaq_lane_f32(vbl${ABC[C:C+4]}, vbd${ABC[C:C+4]}, valphahv, 0);
70 …onst float32x4_t vt${ABC[C:C+4]} = vmlaq_lane_f32(vtl${ABC[C:C+4]}, vtd${ABC[C:C+4]}, valphahv, 0);
71 …onst float32x4_t vb${ABC[C:C+4]} = vmlaq_lane_f32(vbl${ABC[C:C+4]}, vbd${ABC[C:C+4]}, valphahv, 0);
82 … const float32x4_t vo${ABC[C:C+4]} = vfmaq_lane_f32(vt${ABC[C:C+4]}, vd${ABC[C:C+4]}, valphahv, 1);
86 … const float32x4_t vo${ABC[C:C+4]} = vmlaq_lane_f32(vt${ABC[C:C+4]}, vd${ABC[C:C+4]}, valphahv, 1);
106 const float32x4_t vt0123 = vfmaq_lane_f32(vtl0123, vtd0123, valphahv, 0);
[all …]
Dsse.c.in37 __m128 valphahv = _mm_loadl_pi(_mm_undefined_ps(), (const __m64*) weights);
38 valphahv = _mm_unpacklo_ps(valphahv, valphahv);
39 const __m128 valphah = _mm_movelh_ps(valphahv, valphahv);
40 const __m128 valphav = _mm_movehl_ps(valphahv, valphahv);