Home
last modified time | relevance | path

Searched refs:valphav (Results 1 – 25 of 25) sorted by relevance

/external/XNNPACK/src/f32-ibilinear-chw/gen/
Dwasmsimd-p4.c60 const v128_t valphav = wasm_v32x4_shuffle(vw0, vw1, 1, 3, 5, 7); in xnn_f32_ibilinear_chw_ukernel__wasmsimd_p4() local
76 const v128_t vl = wasm_f32x4_add(vtl, wasm_f32x4_mul(vld, valphav)); in xnn_f32_ibilinear_chw_ukernel__wasmsimd_p4()
77 const v128_t vr = wasm_f32x4_add(vtr, wasm_f32x4_mul(vrd, valphav)); in xnn_f32_ibilinear_chw_ukernel__wasmsimd_p4()
92 const v128_t valphav = wasm_v32x4_shuffle(vw, vw, 1, 3, 1, 3); in xnn_f32_ibilinear_chw_ukernel__wasmsimd_p4() local
110 const v128_t vl = wasm_f32x4_add(vtl, wasm_f32x4_mul(vld, valphav)); in xnn_f32_ibilinear_chw_ukernel__wasmsimd_p4()
111 const v128_t vr = wasm_f32x4_add(vtr, wasm_f32x4_mul(vrd, valphav)); in xnn_f32_ibilinear_chw_ukernel__wasmsimd_p4()
134 const v128_t valphav = wasm_v32x4_load_splat(w + 1); in xnn_f32_ibilinear_chw_ukernel__wasmsimd_p4() local
148 const v128_t vlr = wasm_f32x4_add(vtltr, wasm_f32x4_mul(vldrd, valphav)); in xnn_f32_ibilinear_chw_ukernel__wasmsimd_p4()
Dneonfma-p4.c59 const float32x4_t valphav = vw.val[1]; in xnn_f32_ibilinear_chw_ukernel__neonfma_p4() local
77 const float32x4_t vl = vfmaq_f32(vtl, vld, valphav); in xnn_f32_ibilinear_chw_ukernel__neonfma_p4()
78 const float32x4_t vr = vfmaq_f32(vtr, vrd, valphav); in xnn_f32_ibilinear_chw_ukernel__neonfma_p4()
93 const float32x2_t valphav = vw.val[1]; in xnn_f32_ibilinear_chw_ukernel__neonfma_p4() local
117 const float32x2_t vl = vfma_f32(vtl, vld, valphav); in xnn_f32_ibilinear_chw_ukernel__neonfma_p4()
118 const float32x2_t vr = vfma_f32(vtr, vrd, valphav); in xnn_f32_ibilinear_chw_ukernel__neonfma_p4()
141 const float32x2_t valphav = vld1_dup_f32(w + 1); in xnn_f32_ibilinear_chw_ukernel__neonfma_p4() local
155 const float32x2_t vlr = vfma_f32(vtltr, vldrd, valphav); in xnn_f32_ibilinear_chw_ukernel__neonfma_p4()
Dneon-p4.c59 const float32x4_t valphav = vw.val[1]; in xnn_f32_ibilinear_chw_ukernel__neon_p4() local
77 const float32x4_t vl = vmlaq_f32(vtl, vld, valphav); in xnn_f32_ibilinear_chw_ukernel__neon_p4()
78 const float32x4_t vr = vmlaq_f32(vtr, vrd, valphav); in xnn_f32_ibilinear_chw_ukernel__neon_p4()
93 const float32x2_t valphav = vw.val[1]; in xnn_f32_ibilinear_chw_ukernel__neon_p4() local
117 const float32x2_t vl = vmla_f32(vtl, vld, valphav); in xnn_f32_ibilinear_chw_ukernel__neon_p4()
118 const float32x2_t vr = vmla_f32(vtr, vrd, valphav); in xnn_f32_ibilinear_chw_ukernel__neon_p4()
141 const float32x2_t valphav = vld1_dup_f32(w + 1); in xnn_f32_ibilinear_chw_ukernel__neon_p4() local
155 const float32x2_t vlr = vmla_f32(vtltr, vldrd, valphav); in xnn_f32_ibilinear_chw_ukernel__neon_p4()
Dwasmsimd-p8.c146 const v128_t valphav = wasm_v32x4_shuffle(vw0, vw1, 1, 3, 5, 7); in xnn_f32_ibilinear_chw_ukernel__wasmsimd_p8() local
162 const v128_t vl = wasm_f32x4_add(vtl, wasm_f32x4_mul(vld, valphav)); in xnn_f32_ibilinear_chw_ukernel__wasmsimd_p8()
163 const v128_t vr = wasm_f32x4_add(vtr, wasm_f32x4_mul(vrd, valphav)); in xnn_f32_ibilinear_chw_ukernel__wasmsimd_p8()
178 const v128_t valphav = wasm_v32x4_shuffle(vw, vw, 1, 3, 1, 3); in xnn_f32_ibilinear_chw_ukernel__wasmsimd_p8() local
196 const v128_t vl = wasm_f32x4_add(vtl, wasm_f32x4_mul(vld, valphav)); in xnn_f32_ibilinear_chw_ukernel__wasmsimd_p8()
197 const v128_t vr = wasm_f32x4_add(vtr, wasm_f32x4_mul(vrd, valphav)); in xnn_f32_ibilinear_chw_ukernel__wasmsimd_p8()
220 const v128_t valphav = wasm_v32x4_load_splat(w + 1); in xnn_f32_ibilinear_chw_ukernel__wasmsimd_p8() local
234 const v128_t vlr = wasm_f32x4_add(vtltr, wasm_f32x4_mul(vldrd, valphav)); in xnn_f32_ibilinear_chw_ukernel__wasmsimd_p8()
Dneonfma-p8.c147 const float32x4_t valphav = vw.val[1]; in xnn_f32_ibilinear_chw_ukernel__neonfma_p8() local
165 const float32x4_t vl = vfmaq_f32(vtl, vld, valphav); in xnn_f32_ibilinear_chw_ukernel__neonfma_p8()
166 const float32x4_t vr = vfmaq_f32(vtr, vrd, valphav); in xnn_f32_ibilinear_chw_ukernel__neonfma_p8()
181 const float32x2_t valphav = vw.val[1]; in xnn_f32_ibilinear_chw_ukernel__neonfma_p8() local
205 const float32x2_t vl = vfma_f32(vtl, vld, valphav); in xnn_f32_ibilinear_chw_ukernel__neonfma_p8()
206 const float32x2_t vr = vfma_f32(vtr, vrd, valphav); in xnn_f32_ibilinear_chw_ukernel__neonfma_p8()
229 const float32x2_t valphav = vld1_dup_f32(w + 1); in xnn_f32_ibilinear_chw_ukernel__neonfma_p8() local
243 const float32x2_t vlr = vfma_f32(vtltr, vldrd, valphav); in xnn_f32_ibilinear_chw_ukernel__neonfma_p8()
Dneon-p8.c147 const float32x4_t valphav = vw.val[1]; in xnn_f32_ibilinear_chw_ukernel__neon_p8() local
165 const float32x4_t vl = vmlaq_f32(vtl, vld, valphav); in xnn_f32_ibilinear_chw_ukernel__neon_p8()
166 const float32x4_t vr = vmlaq_f32(vtr, vrd, valphav); in xnn_f32_ibilinear_chw_ukernel__neon_p8()
181 const float32x2_t valphav = vw.val[1]; in xnn_f32_ibilinear_chw_ukernel__neon_p8() local
205 const float32x2_t vl = vmla_f32(vtl, vld, valphav); in xnn_f32_ibilinear_chw_ukernel__neon_p8()
206 const float32x2_t vr = vmla_f32(vtr, vrd, valphav); in xnn_f32_ibilinear_chw_ukernel__neon_p8()
229 const float32x2_t valphav = vld1_dup_f32(w + 1); in xnn_f32_ibilinear_chw_ukernel__neon_p8() local
243 const float32x2_t vlr = vmla_f32(vtltr, vldrd, valphav); in xnn_f32_ibilinear_chw_ukernel__neon_p8()
Dscalar-p1.c40 const float valphav = w[1]; in xnn_f32_ibilinear_chw_ukernel__scalar_p1() local
56 const float vo = vt + vd * valphav; in xnn_f32_ibilinear_chw_ukernel__scalar_p1()
Dscalar-p2.c83 const float valphav = w[1]; in xnn_f32_ibilinear_chw_ukernel__scalar_p2() local
99 const float vo = vt + vd * valphav; in xnn_f32_ibilinear_chw_ukernel__scalar_p2()
Dscalar-p4.c113 const float valphav = w[1]; in xnn_f32_ibilinear_chw_ukernel__scalar_p4() local
129 const float vo = vt + vd * valphav; in xnn_f32_ibilinear_chw_ukernel__scalar_p4()
/external/XNNPACK/src/f32-ibilinear/gen/
Dscalar-c4.c36 const float valphav = weights[1]; in xnn_f32_ibilinear_ukernel__scalar_c4() local
85 const float vo0 = vt0 + vd0 * valphav; in xnn_f32_ibilinear_ukernel__scalar_c4()
86 const float vo1 = vt1 + vd1 * valphav; in xnn_f32_ibilinear_ukernel__scalar_c4()
87 const float vo2 = vt2 + vd2 * valphav; in xnn_f32_ibilinear_ukernel__scalar_c4()
88 const float vo3 = vt3 + vd3 * valphav; in xnn_f32_ibilinear_ukernel__scalar_c4()
110 const float vo = vt + vd * valphav; in xnn_f32_ibilinear_ukernel__scalar_c4()
Dscalar-c2.c36 const float valphav = weights[1]; in xnn_f32_ibilinear_ukernel__scalar_c2() local
67 const float vo0 = vt0 + vd0 * valphav; in xnn_f32_ibilinear_ukernel__scalar_c2()
68 const float vo1 = vt1 + vd1 * valphav; in xnn_f32_ibilinear_ukernel__scalar_c2()
88 const float vo = vt + vd * valphav; in xnn_f32_ibilinear_ukernel__scalar_c2()
Dwasmsimd-c8.c38 const v128_t valphav = wasm_v32x4_load_splat(weights + 1); in xnn_f32_ibilinear_ukernel__wasmsimd_c8() local
69 const v128_t vo0123 = wasm_f32x4_add(vt0123, wasm_f32x4_mul(vd0123, valphav)); in xnn_f32_ibilinear_ukernel__wasmsimd_c8()
70 const v128_t vo4567 = wasm_f32x4_add(vt4567, wasm_f32x4_mul(vd4567, valphav)); in xnn_f32_ibilinear_ukernel__wasmsimd_c8()
91 const v128_t vo = wasm_f32x4_add(vt, wasm_f32x4_mul(vd, valphav)); in xnn_f32_ibilinear_ukernel__wasmsimd_c8()
107 v128_t vo = wasm_f32x4_add(vt, wasm_f32x4_mul(vd, valphav)); in xnn_f32_ibilinear_ukernel__wasmsimd_c8()
Dsse-c8.c40 const __m128 valphav = _mm_movehl_ps(valphahv, valphahv); in xnn_f32_ibilinear_ukernel__sse_c8() local
71 const __m128 vo0123 = _mm_add_ps(vt0123, _mm_mul_ps(vd0123, valphav)); in xnn_f32_ibilinear_ukernel__sse_c8()
72 const __m128 vo4567 = _mm_add_ps(vt4567, _mm_mul_ps(vd4567, valphav)); in xnn_f32_ibilinear_ukernel__sse_c8()
96 const __m128 vo0123 = _mm_add_ps(vt0123, _mm_mul_ps(vd0123, valphav)); in xnn_f32_ibilinear_ukernel__sse_c8()
115 __m128 vo0123 = _mm_add_ps(vt0123, _mm_mul_ps(vd0123, valphav)); in xnn_f32_ibilinear_ukernel__sse_c8()
Dneonfma-c8.c41 const float32x4_t valphav = vdupq_lane_f32(valphahv, 1); in xnn_f32_ibilinear_ukernel__neonfma_c8() local
76 const float32x4_t vo0123 = vfmaq_f32(vt0123, vd0123, valphav); in xnn_f32_ibilinear_ukernel__neonfma_c8()
77 const float32x4_t vo4567 = vfmaq_f32(vt4567, vd4567, valphav); in xnn_f32_ibilinear_ukernel__neonfma_c8()
106 const float32x4_t vo0123 = vfmaq_f32(vt0123, vd0123, valphav); in xnn_f32_ibilinear_ukernel__neonfma_c8()
134 float32x4_t vo0123 = vfmaq_f32(vt0123, vd0123, valphav); in xnn_f32_ibilinear_ukernel__neonfma_c8()
Dwasmsimd-c4.c38 const v128_t valphav = wasm_v32x4_load_splat(weights + 1); in xnn_f32_ibilinear_ukernel__wasmsimd_c4() local
57 const v128_t vo = wasm_f32x4_add(vt, wasm_f32x4_mul(vd, valphav)); in xnn_f32_ibilinear_ukernel__wasmsimd_c4()
73 v128_t vo = wasm_f32x4_add(vt, wasm_f32x4_mul(vd, valphav)); in xnn_f32_ibilinear_ukernel__wasmsimd_c4()
Dsse-c4.c40 const __m128 valphav = _mm_movehl_ps(valphahv, valphahv); in xnn_f32_ibilinear_ukernel__sse_c4() local
62 const __m128 vo0123 = _mm_add_ps(vt0123, _mm_mul_ps(vd0123, valphav)); in xnn_f32_ibilinear_ukernel__sse_c4()
81 __m128 vo0123 = _mm_add_ps(vt0123, _mm_mul_ps(vd0123, valphav)); in xnn_f32_ibilinear_ukernel__sse_c4()
Dneonfma-c4.c41 const float32x4_t valphav = vdupq_lane_f32(valphahv, 1); in xnn_f32_ibilinear_ukernel__neonfma_c4() local
65 const float32x4_t vo0123 = vfmaq_f32(vt0123, vd0123, valphav); in xnn_f32_ibilinear_ukernel__neonfma_c4()
92 float32x4_t vo0123 = vfmaq_f32(vt0123, vd0123, valphav); in xnn_f32_ibilinear_ukernel__neonfma_c4()
Dscalar-c1.c36 const float valphav = weights[1]; in xnn_f32_ibilinear_ukernel__scalar_c1() local
54 const float vo = vt + vd * valphav; in xnn_f32_ibilinear_ukernel__scalar_c1()
/external/XNNPACK/src/f32-ibilinear-chw/
Dwasmsimd.c.in53 …const v128_t valphav${ABC[P:P+4]} = wasm_v32x4_shuffle(vw${ABC[P:P+4]}p0, vw${ABC[P:P+4]}p1, 1, 3,…
71 …P:P+4]} = wasm_f32x4_add(vtl${ABC[P:P+4]}, wasm_f32x4_mul(vld${ABC[P:P+4]}, valphav${ABC[P:P+4]}));
72 …P:P+4]} = wasm_f32x4_add(vtr${ABC[P:P+4]}, wasm_f32x4_mul(vrd${ABC[P:P+4]}, valphav${ABC[P:P+4]}));
102 const v128_t valphav = wasm_v32x4_shuffle(vw0, vw1, 1, 3, 5, 7); variable
117 const v128_t vl = wasm_f32x4_add(vtl, wasm_f32x4_mul(vld, valphav));
118 const v128_t vr = wasm_f32x4_add(vtr, wasm_f32x4_mul(vrd, valphav));
133 const v128_t valphav = wasm_v32x4_shuffle(vw, vw, 1, 3, 1, 3); variable
150 const v128_t vl = wasm_f32x4_add(vtl, wasm_f32x4_mul(vld, valphav));
151 const v128_t vr = wasm_f32x4_add(vtr, wasm_f32x4_mul(vrd, valphav));
174 const v128_t valphav = wasm_v32x4_load_splat(w + 1); variable
[all …]
Dneon.c.in52 const float32x4_t valphav${ABC[P:P+4]} = vw${ABC[P:P+4]}.val[1];
72 …32x4_t vl${ABC[P:P+4]} = ${VMULADDQ_F32}(vtl${ABC[P:P+4]}, vld${ABC[P:P+4]}, valphav${ABC[P:P+4]});
73 …32x4_t vr${ABC[P:P+4]} = ${VMULADDQ_F32}(vtr${ABC[P:P+4]}, vrd${ABC[P:P+4]}, valphav${ABC[P:P+4]});
100 const float32x4_t valphav = vw.val[1];
117 const float32x4_t vl = ${VMULADDQ_F32}(vtl, vld, valphav);
118 const float32x4_t vr = ${VMULADDQ_F32}(vtr, vrd, valphav);
133 const float32x2_t valphav = vw.val[1];
155 const float32x2_t vl = ${VMULADD_F32}(vtl, vld, valphav);
156 const float32x2_t vr = ${VMULADD_F32}(vtr, vrd, valphav);
179 const float32x2_t valphav = vld1_dup_f32(w + 1);
[all …]
Dscalar.c.in41 const float valphav${ABC[P]} = w[${P * 2 + 1}];
62 const float vo${ABC[P]} = vt${ABC[P]} + vd${ABC[P]} * valphav${ABC[P]};
75 const float valphav = w[1]; variable
91 const float vo = vt + vd * valphav;
102 const float valphav = w[1]; variable
118 const float vo = vt + vd * valphav;
/external/XNNPACK/src/f32-ibilinear/
Dscalar.c.in35 const float valphav = weights[1];
63 const float vo${ABC[C]} = vt${ABC[C]} + vd${ABC[C]} * valphav;
83 const float vo = vt + vd * valphav;
102 const float vo = vt + vd * valphav;
Dsse.c.in40 const __m128 valphav = _mm_movehl_ps(valphahv, valphahv);
71 … const __m128 vo${ABC[C:C+4]} = _mm_add_ps(vt${ABC[C:C+4]}, _mm_mul_ps(vd${ABC[C:C+4]}, valphav));
97 const __m128 vo0123 = _mm_add_ps(vt0123, _mm_mul_ps(vd0123, valphav));
116 __m128 vo0123 = _mm_add_ps(vt0123, _mm_mul_ps(vd0123, valphav));
Dwasmsimd.c.in38 const v128_t valphav = wasm_v32x4_load_splat(weights + 1);
70 …v128_t vo${ABC[C:C+4]} = wasm_f32x4_add(vt${ABC[C:C+4]}, wasm_f32x4_mul(vd${ABC[C:C+4]}, valphav));
92 const v128_t vo = wasm_f32x4_add(vt, wasm_f32x4_mul(vd, valphav));
108 v128_t vo = wasm_f32x4_add(vt, wasm_f32x4_mul(vd, valphav));
Dneon.c.in43 const float32x4_t valphav = vdupq_lane_f32(valphahv, 1);
79 const float32x4_t vo${ABC[C:C+4]} = vfmaq_f32(vt${ABC[C:C+4]}, vd${ABC[C:C+4]}, valphav);
117 const float32x4_t vo0123 = vfmaq_f32(vt0123, vd0123, valphav);
152 float32x4_t vo0123 = vfmaq_f32(vt0123, vd0123, valphav);