Home
last modified time | relevance | path

Searched refs:_mm_sub_ps (Results 1 – 25 of 56) sorted by relevance

123

/external/XNNPACK/src/f32-bilinear/gen/
Dsse-c8.c58 const __m128 vtd0123 = _mm_sub_ps(vtr0123, vtl0123); in xnn_f32_bilinear_ukernel__sse_c8()
59 const __m128 vbd0123 = _mm_sub_ps(vbr0123, vbl0123); in xnn_f32_bilinear_ukernel__sse_c8()
60 const __m128 vtd4567 = _mm_sub_ps(vtr4567, vtl4567); in xnn_f32_bilinear_ukernel__sse_c8()
61 const __m128 vbd4567 = _mm_sub_ps(vbr4567, vbl4567); in xnn_f32_bilinear_ukernel__sse_c8()
68 const __m128 vd0123 = _mm_sub_ps(vb0123, vt0123); in xnn_f32_bilinear_ukernel__sse_c8()
69 const __m128 vd4567 = _mm_sub_ps(vb4567, vt4567); in xnn_f32_bilinear_ukernel__sse_c8()
88 const __m128 vtd0123 = _mm_sub_ps(vtr0123, vtl0123); in xnn_f32_bilinear_ukernel__sse_c8()
89 const __m128 vbd0123 = _mm_sub_ps(vbr0123, vbl0123); in xnn_f32_bilinear_ukernel__sse_c8()
94 const __m128 vd0123 = _mm_sub_ps(vb0123, vt0123); in xnn_f32_bilinear_ukernel__sse_c8()
107 const __m128 vtd0123 = _mm_sub_ps(vtr0123, vtl0123); in xnn_f32_bilinear_ukernel__sse_c8()
[all …]
Dsse-c4.c54 const __m128 vtd0123 = _mm_sub_ps(vtr0123, vtl0123); in xnn_f32_bilinear_ukernel__sse_c4()
55 const __m128 vbd0123 = _mm_sub_ps(vbr0123, vbl0123); in xnn_f32_bilinear_ukernel__sse_c4()
60 const __m128 vd0123 = _mm_sub_ps(vb0123, vt0123); in xnn_f32_bilinear_ukernel__sse_c4()
73 const __m128 vtd0123 = _mm_sub_ps(vtr0123, vtl0123); in xnn_f32_bilinear_ukernel__sse_c4()
74 const __m128 vbd0123 = _mm_sub_ps(vbr0123, vbl0123); in xnn_f32_bilinear_ukernel__sse_c4()
79 const __m128 vd0123 = _mm_sub_ps(vb0123, vt0123); in xnn_f32_bilinear_ukernel__sse_c4()
/external/webrtc/webrtc/modules/audio_processing/aec/
Daec_rdft_sse2.c39 const __m128 x1v = _mm_sub_ps(a01v, a23v); in cft1st_128_SSE2()
41 const __m128 x3v = _mm_sub_ps(a45v, a67v); in cft1st_128_SSE2()
44 x0v = _mm_sub_ps(x0v, x2v); in cft1st_128_SSE2()
61 x0v = _mm_sub_ps(x1v, x3s); in cft1st_128_SSE2()
99 const __m128 x1r0_1i0_1r1_x1i1 = _mm_sub_ps(a_00_32, a_08_40); in cftmdl_128_SSE2()
112 const __m128 x3r0_3i0_3r1_x3i1 = _mm_sub_ps(a_16_48, a_24_56); in cftmdl_128_SSE2()
115 const __m128 xx1 = _mm_sub_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1); in cftmdl_128_SSE2()
121 const __m128 x1_x3_sub = _mm_sub_ps(x1r0_1i0_1r1_x1i1, x3_swapped); in cftmdl_128_SSE2()
173 const __m128 x1r0_1i0_1r1_x1i1 = _mm_sub_ps(a_00_32, a_08_40); in cftmdl_128_SSE2()
186 const __m128 x3r0_3i0_3r1_x3i1 = _mm_sub_ps(a_16_48, a_24_56); in cftmdl_128_SSE2()
[all …]
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/
Dsse2-p5-x20.c54 const __m128 vx0123 = _mm_sub_ps(vi0123, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20()
55 const __m128 vx4567 = _mm_sub_ps(vi4567, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20()
56 const __m128 vx89AB = _mm_sub_ps(vi89AB, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20()
57 const __m128 vxCDEF = _mm_sub_ps(viCDEF, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20()
58 const __m128 vxGHIJ = _mm_sub_ps(viGHIJ, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20()
76 vn0123 = _mm_sub_ps(vn0123, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20()
77 vn4567 = _mm_sub_ps(vn4567, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20()
78 vn89AB = _mm_sub_ps(vn89AB, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20()
79 vnCDEF = _mm_sub_ps(vnCDEF, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20()
80 vnGHIJ = _mm_sub_ps(vnGHIJ, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20()
[all …]
Dsse2-p5-x20-acc2.c55 const __m128 vx0123 = _mm_sub_ps(vi0123, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc2()
56 const __m128 vx4567 = _mm_sub_ps(vi4567, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc2()
57 const __m128 vx89AB = _mm_sub_ps(vi89AB, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc2()
58 const __m128 vxCDEF = _mm_sub_ps(viCDEF, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc2()
59 const __m128 vxGHIJ = _mm_sub_ps(viGHIJ, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc2()
77 vn0123 = _mm_sub_ps(vn0123, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc2()
78 vn4567 = _mm_sub_ps(vn4567, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc2()
79 vn89AB = _mm_sub_ps(vn89AB, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc2()
80 vnCDEF = _mm_sub_ps(vnCDEF, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc2()
81 vnGHIJ = _mm_sub_ps(vnGHIJ, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc2()
[all …]
Dsse2-p5-x20-acc5.c58 const __m128 vx0123 = _mm_sub_ps(vi0123, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc5()
59 const __m128 vx4567 = _mm_sub_ps(vi4567, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc5()
60 const __m128 vx89AB = _mm_sub_ps(vi89AB, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc5()
61 const __m128 vxCDEF = _mm_sub_ps(viCDEF, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc5()
62 const __m128 vxGHIJ = _mm_sub_ps(viGHIJ, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc5()
80 vn0123 = _mm_sub_ps(vn0123, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc5()
81 vn4567 = _mm_sub_ps(vn4567, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc5()
82 vn89AB = _mm_sub_ps(vn89AB, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc5()
83 vnCDEF = _mm_sub_ps(vnCDEF, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc5()
84 vnGHIJ = _mm_sub_ps(vnGHIJ, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc5()
[all …]
Dsse2-p5-x16.c53 const __m128 vx0123 = _mm_sub_ps(vi0123, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16()
54 const __m128 vx4567 = _mm_sub_ps(vi4567, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16()
55 const __m128 vx89AB = _mm_sub_ps(vi89AB, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16()
56 const __m128 vxCDEF = _mm_sub_ps(viCDEF, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16()
72 vn0123 = _mm_sub_ps(vn0123, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16()
73 vn4567 = _mm_sub_ps(vn4567, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16()
74 vn89AB = _mm_sub_ps(vn89AB, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16()
75 vnCDEF = _mm_sub_ps(vnCDEF, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16()
152 const __m128 vx = _mm_sub_ps(vi, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16()
162 vn = _mm_sub_ps(vn, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16()
[all …]
Dsse2-p5-x16-acc4.c56 const __m128 vx0123 = _mm_sub_ps(vi0123, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc4()
57 const __m128 vx4567 = _mm_sub_ps(vi4567, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc4()
58 const __m128 vx89AB = _mm_sub_ps(vi89AB, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc4()
59 const __m128 vxCDEF = _mm_sub_ps(viCDEF, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc4()
75 vn0123 = _mm_sub_ps(vn0123, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc4()
76 vn4567 = _mm_sub_ps(vn4567, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc4()
77 vn89AB = _mm_sub_ps(vn89AB, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc4()
78 vnCDEF = _mm_sub_ps(vnCDEF, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc4()
159 const __m128 vx = _mm_sub_ps(vi, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc4()
169 vn = _mm_sub_ps(vn, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc4()
[all …]
Dsse2-p5-x16-acc2.c54 const __m128 vx0123 = _mm_sub_ps(vi0123, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc2()
55 const __m128 vx4567 = _mm_sub_ps(vi4567, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc2()
56 const __m128 vx89AB = _mm_sub_ps(vi89AB, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc2()
57 const __m128 vxCDEF = _mm_sub_ps(viCDEF, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc2()
73 vn0123 = _mm_sub_ps(vn0123, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc2()
74 vn4567 = _mm_sub_ps(vn4567, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc2()
75 vn89AB = _mm_sub_ps(vn89AB, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc2()
76 vnCDEF = _mm_sub_ps(vnCDEF, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc2()
155 const __m128 vx = _mm_sub_ps(vi, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc2()
165 vn = _mm_sub_ps(vn, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc2()
[all …]
Dsse2-p5-x12.c52 const __m128 vx0123 = _mm_sub_ps(vi0123, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12()
53 const __m128 vx4567 = _mm_sub_ps(vi4567, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12()
54 const __m128 vx89AB = _mm_sub_ps(vi89AB, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12()
68 vn0123 = _mm_sub_ps(vn0123, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12()
69 vn4567 = _mm_sub_ps(vn4567, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12()
70 vn89AB = _mm_sub_ps(vn89AB, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12()
136 const __m128 vx = _mm_sub_ps(vi, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12()
146 vn = _mm_sub_ps(vn, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12()
184 const __m128 vx = _mm_sub_ps(vi, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12()
194 vn = _mm_sub_ps(vn, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12()
Dsse2-p5-x12-acc3.c54 const __m128 vx0123 = _mm_sub_ps(vi0123, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc3()
55 const __m128 vx4567 = _mm_sub_ps(vi4567, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc3()
56 const __m128 vx89AB = _mm_sub_ps(vi89AB, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc3()
70 vn0123 = _mm_sub_ps(vn0123, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc3()
71 vn4567 = _mm_sub_ps(vn4567, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc3()
72 vn89AB = _mm_sub_ps(vn89AB, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc3()
141 const __m128 vx = _mm_sub_ps(vi, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc3()
151 vn = _mm_sub_ps(vn, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc3()
189 const __m128 vx = _mm_sub_ps(vi, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc3()
199 vn = _mm_sub_ps(vn, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc3()
Dsse2-p5-x12-acc2.c53 const __m128 vx0123 = _mm_sub_ps(vi0123, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc2()
54 const __m128 vx4567 = _mm_sub_ps(vi4567, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc2()
55 const __m128 vx89AB = _mm_sub_ps(vi89AB, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc2()
69 vn0123 = _mm_sub_ps(vn0123, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc2()
70 vn4567 = _mm_sub_ps(vn4567, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc2()
71 vn89AB = _mm_sub_ps(vn89AB, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc2()
139 const __m128 vx = _mm_sub_ps(vi, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc2()
149 vn = _mm_sub_ps(vn, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc2()
187 const __m128 vx = _mm_sub_ps(vi, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc2()
197 vn = _mm_sub_ps(vn, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc2()
Dsse2-p5-x8.c51 const __m128 vx0123 = _mm_sub_ps(vi0123, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8()
52 const __m128 vx4567 = _mm_sub_ps(vi4567, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8()
64 vn0123 = _mm_sub_ps(vn0123, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8()
65 vn4567 = _mm_sub_ps(vn4567, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8()
120 const __m128 vx = _mm_sub_ps(vi, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8()
130 vn = _mm_sub_ps(vn, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8()
168 const __m128 vx = _mm_sub_ps(vi, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8()
178 vn = _mm_sub_ps(vn, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8()
Dsse2-p5-x8-acc2.c52 const __m128 vx0123 = _mm_sub_ps(vi0123, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2()
53 const __m128 vx4567 = _mm_sub_ps(vi4567, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2()
65 vn0123 = _mm_sub_ps(vn0123, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2()
66 vn4567 = _mm_sub_ps(vn4567, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2()
123 const __m128 vx = _mm_sub_ps(vi, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2()
133 vn = _mm_sub_ps(vn, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2()
171 const __m128 vx = _mm_sub_ps(vi, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2()
181 vn = _mm_sub_ps(vn, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2()
/external/XNNPACK/src/f32-sigmoid/gen/
Dsse41-p5-div-x24.c88 vn0123 = _mm_sub_ps(vn0123, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24()
89 vn4567 = _mm_sub_ps(vn4567, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24()
90 vn89AB = _mm_sub_ps(vn89AB, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24()
91 vnCDEF = _mm_sub_ps(vnCDEF, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24()
92 vnGHIJ = _mm_sub_ps(vnGHIJ, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24()
93 vnKLMN = _mm_sub_ps(vnKLMN, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24()
184 vf0123 = _mm_blendv_ps(_mm_sub_ps(vone, vf0123), vf0123, vx0123); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24()
185 vf4567 = _mm_blendv_ps(_mm_sub_ps(vone, vf4567), vf4567, vx4567); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24()
186 vf89AB = _mm_blendv_ps(_mm_sub_ps(vone, vf89AB), vf89AB, vx89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24()
187 vfCDEF = _mm_blendv_ps(_mm_sub_ps(vone, vfCDEF), vfCDEF, vxCDEF); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24()
[all …]
Dsse41-p5-div-x20.c84 vn0123 = _mm_sub_ps(vn0123, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20()
85 vn4567 = _mm_sub_ps(vn4567, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20()
86 vn89AB = _mm_sub_ps(vn89AB, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20()
87 vnCDEF = _mm_sub_ps(vnCDEF, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20()
88 vnGHIJ = _mm_sub_ps(vnGHIJ, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20()
168 vf0123 = _mm_blendv_ps(_mm_sub_ps(vone, vf0123), vf0123, vx0123); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20()
169 vf4567 = _mm_blendv_ps(_mm_sub_ps(vone, vf4567), vf4567, vx4567); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20()
170 vf89AB = _mm_blendv_ps(_mm_sub_ps(vone, vf89AB), vf89AB, vx89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20()
171 vfCDEF = _mm_blendv_ps(_mm_sub_ps(vone, vfCDEF), vfCDEF, vxCDEF); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20()
172 vfGHIJ = _mm_blendv_ps(_mm_sub_ps(vone, vfGHIJ), vfGHIJ, vxGHIJ); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20()
[all …]
Dsse2-p5-div-x24.c88 vn0123 = _mm_sub_ps(vn0123, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24()
89 vn4567 = _mm_sub_ps(vn4567, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24()
90 vn89AB = _mm_sub_ps(vn89AB, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24()
91 vnCDEF = _mm_sub_ps(vnCDEF, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24()
92 vnGHIJ = _mm_sub_ps(vnGHIJ, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24()
93 vnKLMN = _mm_sub_ps(vnKLMN, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24()
191 vf0123 = _mm_or_ps(_mm_and_ps(vf0123, vm0123), _mm_andnot_ps(vm0123, _mm_sub_ps(vone, vf0123))); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24()
192 vf4567 = _mm_or_ps(_mm_and_ps(vf4567, vm4567), _mm_andnot_ps(vm4567, _mm_sub_ps(vone, vf4567))); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24()
193 vf89AB = _mm_or_ps(_mm_and_ps(vf89AB, vm89AB), _mm_andnot_ps(vm89AB, _mm_sub_ps(vone, vf89AB))); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24()
194 vfCDEF = _mm_or_ps(_mm_and_ps(vfCDEF, vmCDEF), _mm_andnot_ps(vmCDEF, _mm_sub_ps(vone, vfCDEF))); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24()
[all …]
Dsse2-p5-div-x20.c84 vn0123 = _mm_sub_ps(vn0123, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20()
85 vn4567 = _mm_sub_ps(vn4567, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20()
86 vn89AB = _mm_sub_ps(vn89AB, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20()
87 vnCDEF = _mm_sub_ps(vnCDEF, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20()
88 vnGHIJ = _mm_sub_ps(vnGHIJ, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20()
174 vf0123 = _mm_or_ps(_mm_and_ps(vf0123, vm0123), _mm_andnot_ps(vm0123, _mm_sub_ps(vone, vf0123))); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20()
175 vf4567 = _mm_or_ps(_mm_and_ps(vf4567, vm4567), _mm_andnot_ps(vm4567, _mm_sub_ps(vone, vf4567))); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20()
176 vf89AB = _mm_or_ps(_mm_and_ps(vf89AB, vm89AB), _mm_andnot_ps(vm89AB, _mm_sub_ps(vone, vf89AB))); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20()
177 vfCDEF = _mm_or_ps(_mm_and_ps(vfCDEF, vmCDEF), _mm_andnot_ps(vmCDEF, _mm_sub_ps(vone, vfCDEF))); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20()
178 vfGHIJ = _mm_or_ps(_mm_and_ps(vfGHIJ, vmGHIJ), _mm_andnot_ps(vmGHIJ, _mm_sub_ps(vone, vfGHIJ))); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20()
[all …]
Dsse41-p5-div-x16.c80 vn0123 = _mm_sub_ps(vn0123, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16()
81 vn4567 = _mm_sub_ps(vn4567, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16()
82 vn89AB = _mm_sub_ps(vn89AB, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16()
83 vnCDEF = _mm_sub_ps(vnCDEF, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16()
152 vf0123 = _mm_blendv_ps(_mm_sub_ps(vone, vf0123), vf0123, vx0123); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16()
153 vf4567 = _mm_blendv_ps(_mm_sub_ps(vone, vf4567), vf4567, vx4567); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16()
154 vf89AB = _mm_blendv_ps(_mm_sub_ps(vone, vf89AB), vf89AB, vx89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16()
155 vfCDEF = _mm_blendv_ps(_mm_sub_ps(vone, vfCDEF), vfCDEF, vxCDEF); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16()
190 vn = _mm_sub_ps(vn, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16()
221 vf = _mm_blendv_ps(_mm_sub_ps(vone, vf), vf, vx); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16()
[all …]
Dsse41-p5-div-x12.c76 vn0123 = _mm_sub_ps(vn0123, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12()
77 vn4567 = _mm_sub_ps(vn4567, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12()
78 vn89AB = _mm_sub_ps(vn89AB, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12()
136 vf0123 = _mm_blendv_ps(_mm_sub_ps(vone, vf0123), vf0123, vx0123); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12()
137 vf4567 = _mm_blendv_ps(_mm_sub_ps(vone, vf4567), vf4567, vx4567); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12()
138 vf89AB = _mm_blendv_ps(_mm_sub_ps(vone, vf89AB), vf89AB, vx89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12()
172 vn = _mm_sub_ps(vn, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12()
203 vf = _mm_blendv_ps(_mm_sub_ps(vone, vf), vf, vx); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12()
235 vn = _mm_sub_ps(vn, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12()
266 vf = _mm_blendv_ps(_mm_sub_ps(vone, vf), vf, vx); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12()
Dsse2-p5-div-x16.c80 vn0123 = _mm_sub_ps(vn0123, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x16()
81 vn4567 = _mm_sub_ps(vn4567, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x16()
82 vn89AB = _mm_sub_ps(vn89AB, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x16()
83 vnCDEF = _mm_sub_ps(vnCDEF, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x16()
157 vf0123 = _mm_or_ps(_mm_and_ps(vf0123, vm0123), _mm_andnot_ps(vm0123, _mm_sub_ps(vone, vf0123))); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x16()
158 vf4567 = _mm_or_ps(_mm_and_ps(vf4567, vm4567), _mm_andnot_ps(vm4567, _mm_sub_ps(vone, vf4567))); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x16()
159 vf89AB = _mm_or_ps(_mm_and_ps(vf89AB, vm89AB), _mm_andnot_ps(vm89AB, _mm_sub_ps(vone, vf89AB))); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x16()
160 vfCDEF = _mm_or_ps(_mm_and_ps(vfCDEF, vmCDEF), _mm_andnot_ps(vmCDEF, _mm_sub_ps(vone, vfCDEF))); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x16()
195 vn = _mm_sub_ps(vn, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x16()
227 vf = _mm_or_ps(_mm_and_ps(vf, vm), _mm_andnot_ps(vm, _mm_sub_ps(vone, vf))); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x16()
[all …]
Dsse2-p5-div-x12.c76 vn0123 = _mm_sub_ps(vn0123, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12()
77 vn4567 = _mm_sub_ps(vn4567, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12()
78 vn89AB = _mm_sub_ps(vn89AB, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12()
140 vf0123 = _mm_or_ps(_mm_and_ps(vf0123, vm0123), _mm_andnot_ps(vm0123, _mm_sub_ps(vone, vf0123))); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12()
141 vf4567 = _mm_or_ps(_mm_and_ps(vf4567, vm4567), _mm_andnot_ps(vm4567, _mm_sub_ps(vone, vf4567))); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12()
142 vf89AB = _mm_or_ps(_mm_and_ps(vf89AB, vm89AB), _mm_andnot_ps(vm89AB, _mm_sub_ps(vone, vf89AB))); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12()
176 vn = _mm_sub_ps(vn, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12()
208 vf = _mm_or_ps(_mm_and_ps(vf, vm), _mm_andnot_ps(vm, _mm_sub_ps(vone, vf))); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12()
240 vn = _mm_sub_ps(vn, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12()
272 vf = _mm_or_ps(_mm_and_ps(vf, vm), _mm_andnot_ps(vm, _mm_sub_ps(vone, vf))); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12()
Dsse41-p5-div-x8.c72 vn0123 = _mm_sub_ps(vn0123, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8()
73 vn4567 = _mm_sub_ps(vn4567, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8()
120 vf0123 = _mm_blendv_ps(_mm_sub_ps(vone, vf0123), vf0123, vx0123); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8()
121 vf4567 = _mm_blendv_ps(_mm_sub_ps(vone, vf4567), vf4567, vx4567); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8()
154 vn = _mm_sub_ps(vn, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8()
185 vf = _mm_blendv_ps(_mm_sub_ps(vone, vf), vf, vx); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8()
217 vn = _mm_sub_ps(vn, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8()
248 vf = _mm_blendv_ps(_mm_sub_ps(vone, vf), vf, vx); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8()
/external/libaom/libaom/aom_dsp/x86/
Dfft_sse2.c75 real1 = _mm_sub_ps(real1, real2); in aom_fft_unpack_2d_output_sse2()
99 imag1 = _mm_sub_ps(imag2, imag1); in aom_fft_unpack_2d_output_sse2()
109 _mm_set1_ps, _mm_add_ps, _mm_sub_ps);
111 _mm_set1_ps, _mm_add_ps, _mm_sub_ps, _mm_mul_ps);
113 _mm_set1_ps, _mm_add_ps, _mm_sub_ps, _mm_mul_ps);
115 _mm_set1_ps, _mm_add_ps, _mm_sub_ps, _mm_mul_ps);
139 _mm_set1_ps, _mm_add_ps, _mm_sub_ps);
141 _mm_set1_ps, _mm_add_ps, _mm_sub_ps, _mm_mul_ps);
143 _mm_set1_ps, _mm_add_ps, _mm_sub_ps, _mm_mul_ps);
145 _mm_set1_ps, _mm_add_ps, _mm_sub_ps, _mm_mul_ps);
/external/XNNPACK/src/f32-bilinear/
Dsse.c.in60 const __m128 vtd${ABC[C:C+4]} = _mm_sub_ps(vtr${ABC[C:C+4]}, vtl${ABC[C:C+4]});
61 const __m128 vbd${ABC[C:C+4]} = _mm_sub_ps(vbr${ABC[C:C+4]}, vbl${ABC[C:C+4]});
68 const __m128 vd${ABC[C:C+4]} = _mm_sub_ps(vb${ABC[C:C+4]}, vt${ABC[C:C+4]});
89 const __m128 vtd0123 = _mm_sub_ps(vtr0123, vtl0123);
90 const __m128 vbd0123 = _mm_sub_ps(vbr0123, vbl0123);
95 const __m128 vd0123 = _mm_sub_ps(vb0123, vt0123);
108 const __m128 vtd0123 = _mm_sub_ps(vtr0123, vtl0123);
109 const __m128 vbd0123 = _mm_sub_ps(vbr0123, vbl0123);
114 const __m128 vd0123 = _mm_sub_ps(vb0123, vt0123);

123