/external/XNNPACK/src/f32-bilinear/gen/ |
D | sse-c8.c | 58 const __m128 vtd0123 = _mm_sub_ps(vtr0123, vtl0123); in xnn_f32_bilinear_ukernel__sse_c8() 59 const __m128 vbd0123 = _mm_sub_ps(vbr0123, vbl0123); in xnn_f32_bilinear_ukernel__sse_c8() 60 const __m128 vtd4567 = _mm_sub_ps(vtr4567, vtl4567); in xnn_f32_bilinear_ukernel__sse_c8() 61 const __m128 vbd4567 = _mm_sub_ps(vbr4567, vbl4567); in xnn_f32_bilinear_ukernel__sse_c8() 68 const __m128 vd0123 = _mm_sub_ps(vb0123, vt0123); in xnn_f32_bilinear_ukernel__sse_c8() 69 const __m128 vd4567 = _mm_sub_ps(vb4567, vt4567); in xnn_f32_bilinear_ukernel__sse_c8() 88 const __m128 vtd0123 = _mm_sub_ps(vtr0123, vtl0123); in xnn_f32_bilinear_ukernel__sse_c8() 89 const __m128 vbd0123 = _mm_sub_ps(vbr0123, vbl0123); in xnn_f32_bilinear_ukernel__sse_c8() 94 const __m128 vd0123 = _mm_sub_ps(vb0123, vt0123); in xnn_f32_bilinear_ukernel__sse_c8() 107 const __m128 vtd0123 = _mm_sub_ps(vtr0123, vtl0123); in xnn_f32_bilinear_ukernel__sse_c8() [all …]
|
D | sse-c4.c | 54 const __m128 vtd0123 = _mm_sub_ps(vtr0123, vtl0123); in xnn_f32_bilinear_ukernel__sse_c4() 55 const __m128 vbd0123 = _mm_sub_ps(vbr0123, vbl0123); in xnn_f32_bilinear_ukernel__sse_c4() 60 const __m128 vd0123 = _mm_sub_ps(vb0123, vt0123); in xnn_f32_bilinear_ukernel__sse_c4() 73 const __m128 vtd0123 = _mm_sub_ps(vtr0123, vtl0123); in xnn_f32_bilinear_ukernel__sse_c4() 74 const __m128 vbd0123 = _mm_sub_ps(vbr0123, vbl0123); in xnn_f32_bilinear_ukernel__sse_c4() 79 const __m128 vd0123 = _mm_sub_ps(vb0123, vt0123); in xnn_f32_bilinear_ukernel__sse_c4()
|
/external/webrtc/webrtc/modules/audio_processing/aec/ |
D | aec_rdft_sse2.c | 39 const __m128 x1v = _mm_sub_ps(a01v, a23v); in cft1st_128_SSE2() 41 const __m128 x3v = _mm_sub_ps(a45v, a67v); in cft1st_128_SSE2() 44 x0v = _mm_sub_ps(x0v, x2v); in cft1st_128_SSE2() 61 x0v = _mm_sub_ps(x1v, x3s); in cft1st_128_SSE2() 99 const __m128 x1r0_1i0_1r1_x1i1 = _mm_sub_ps(a_00_32, a_08_40); in cftmdl_128_SSE2() 112 const __m128 x3r0_3i0_3r1_x3i1 = _mm_sub_ps(a_16_48, a_24_56); in cftmdl_128_SSE2() 115 const __m128 xx1 = _mm_sub_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1); in cftmdl_128_SSE2() 121 const __m128 x1_x3_sub = _mm_sub_ps(x1r0_1i0_1r1_x1i1, x3_swapped); in cftmdl_128_SSE2() 173 const __m128 x1r0_1i0_1r1_x1i1 = _mm_sub_ps(a_00_32, a_08_40); in cftmdl_128_SSE2() 186 const __m128 x3r0_3i0_3r1_x3i1 = _mm_sub_ps(a_16_48, a_24_56); in cftmdl_128_SSE2() [all …]
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | sse2-p5-x20.c | 54 const __m128 vx0123 = _mm_sub_ps(vi0123, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20() 55 const __m128 vx4567 = _mm_sub_ps(vi4567, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20() 56 const __m128 vx89AB = _mm_sub_ps(vi89AB, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20() 57 const __m128 vxCDEF = _mm_sub_ps(viCDEF, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20() 58 const __m128 vxGHIJ = _mm_sub_ps(viGHIJ, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20() 76 vn0123 = _mm_sub_ps(vn0123, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20() 77 vn4567 = _mm_sub_ps(vn4567, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20() 78 vn89AB = _mm_sub_ps(vn89AB, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20() 79 vnCDEF = _mm_sub_ps(vnCDEF, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20() 80 vnGHIJ = _mm_sub_ps(vnGHIJ, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20() [all …]
|
D | sse2-p5-x20-acc2.c | 55 const __m128 vx0123 = _mm_sub_ps(vi0123, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc2() 56 const __m128 vx4567 = _mm_sub_ps(vi4567, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc2() 57 const __m128 vx89AB = _mm_sub_ps(vi89AB, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc2() 58 const __m128 vxCDEF = _mm_sub_ps(viCDEF, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc2() 59 const __m128 vxGHIJ = _mm_sub_ps(viGHIJ, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc2() 77 vn0123 = _mm_sub_ps(vn0123, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc2() 78 vn4567 = _mm_sub_ps(vn4567, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc2() 79 vn89AB = _mm_sub_ps(vn89AB, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc2() 80 vnCDEF = _mm_sub_ps(vnCDEF, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc2() 81 vnGHIJ = _mm_sub_ps(vnGHIJ, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc2() [all …]
|
D | sse2-p5-x20-acc5.c | 58 const __m128 vx0123 = _mm_sub_ps(vi0123, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc5() 59 const __m128 vx4567 = _mm_sub_ps(vi4567, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc5() 60 const __m128 vx89AB = _mm_sub_ps(vi89AB, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc5() 61 const __m128 vxCDEF = _mm_sub_ps(viCDEF, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc5() 62 const __m128 vxGHIJ = _mm_sub_ps(viGHIJ, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc5() 80 vn0123 = _mm_sub_ps(vn0123, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc5() 81 vn4567 = _mm_sub_ps(vn4567, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc5() 82 vn89AB = _mm_sub_ps(vn89AB, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc5() 83 vnCDEF = _mm_sub_ps(vnCDEF, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc5() 84 vnGHIJ = _mm_sub_ps(vnGHIJ, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x20_acc5() [all …]
|
D | sse2-p5-x16.c | 53 const __m128 vx0123 = _mm_sub_ps(vi0123, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16() 54 const __m128 vx4567 = _mm_sub_ps(vi4567, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16() 55 const __m128 vx89AB = _mm_sub_ps(vi89AB, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16() 56 const __m128 vxCDEF = _mm_sub_ps(viCDEF, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16() 72 vn0123 = _mm_sub_ps(vn0123, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16() 73 vn4567 = _mm_sub_ps(vn4567, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16() 74 vn89AB = _mm_sub_ps(vn89AB, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16() 75 vnCDEF = _mm_sub_ps(vnCDEF, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16() 152 const __m128 vx = _mm_sub_ps(vi, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16() 162 vn = _mm_sub_ps(vn, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16() [all …]
|
D | sse2-p5-x16-acc4.c | 56 const __m128 vx0123 = _mm_sub_ps(vi0123, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc4() 57 const __m128 vx4567 = _mm_sub_ps(vi4567, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc4() 58 const __m128 vx89AB = _mm_sub_ps(vi89AB, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc4() 59 const __m128 vxCDEF = _mm_sub_ps(viCDEF, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc4() 75 vn0123 = _mm_sub_ps(vn0123, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc4() 76 vn4567 = _mm_sub_ps(vn4567, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc4() 77 vn89AB = _mm_sub_ps(vn89AB, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc4() 78 vnCDEF = _mm_sub_ps(vnCDEF, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc4() 159 const __m128 vx = _mm_sub_ps(vi, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc4() 169 vn = _mm_sub_ps(vn, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc4() [all …]
|
D | sse2-p5-x16-acc2.c | 54 const __m128 vx0123 = _mm_sub_ps(vi0123, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc2() 55 const __m128 vx4567 = _mm_sub_ps(vi4567, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc2() 56 const __m128 vx89AB = _mm_sub_ps(vi89AB, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc2() 57 const __m128 vxCDEF = _mm_sub_ps(viCDEF, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc2() 73 vn0123 = _mm_sub_ps(vn0123, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc2() 74 vn4567 = _mm_sub_ps(vn4567, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc2() 75 vn89AB = _mm_sub_ps(vn89AB, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc2() 76 vnCDEF = _mm_sub_ps(vnCDEF, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc2() 155 const __m128 vx = _mm_sub_ps(vi, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc2() 165 vn = _mm_sub_ps(vn, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x16_acc2() [all …]
|
D | sse2-p5-x12.c | 52 const __m128 vx0123 = _mm_sub_ps(vi0123, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12() 53 const __m128 vx4567 = _mm_sub_ps(vi4567, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12() 54 const __m128 vx89AB = _mm_sub_ps(vi89AB, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12() 68 vn0123 = _mm_sub_ps(vn0123, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12() 69 vn4567 = _mm_sub_ps(vn4567, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12() 70 vn89AB = _mm_sub_ps(vn89AB, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12() 136 const __m128 vx = _mm_sub_ps(vi, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12() 146 vn = _mm_sub_ps(vn, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12() 184 const __m128 vx = _mm_sub_ps(vi, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12() 194 vn = _mm_sub_ps(vn, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12()
|
D | sse2-p5-x12-acc3.c | 54 const __m128 vx0123 = _mm_sub_ps(vi0123, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc3() 55 const __m128 vx4567 = _mm_sub_ps(vi4567, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc3() 56 const __m128 vx89AB = _mm_sub_ps(vi89AB, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc3() 70 vn0123 = _mm_sub_ps(vn0123, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc3() 71 vn4567 = _mm_sub_ps(vn4567, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc3() 72 vn89AB = _mm_sub_ps(vn89AB, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc3() 141 const __m128 vx = _mm_sub_ps(vi, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc3() 151 vn = _mm_sub_ps(vn, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc3() 189 const __m128 vx = _mm_sub_ps(vi, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc3() 199 vn = _mm_sub_ps(vn, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc3()
|
D | sse2-p5-x12-acc2.c | 53 const __m128 vx0123 = _mm_sub_ps(vi0123, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc2() 54 const __m128 vx4567 = _mm_sub_ps(vi4567, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc2() 55 const __m128 vx89AB = _mm_sub_ps(vi89AB, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc2() 69 vn0123 = _mm_sub_ps(vn0123, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc2() 70 vn4567 = _mm_sub_ps(vn4567, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc2() 71 vn89AB = _mm_sub_ps(vn89AB, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc2() 139 const __m128 vx = _mm_sub_ps(vi, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc2() 149 vn = _mm_sub_ps(vn, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc2() 187 const __m128 vx = _mm_sub_ps(vi, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc2() 197 vn = _mm_sub_ps(vn, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x12_acc2()
|
D | sse2-p5-x8.c | 51 const __m128 vx0123 = _mm_sub_ps(vi0123, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8() 52 const __m128 vx4567 = _mm_sub_ps(vi4567, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8() 64 vn0123 = _mm_sub_ps(vn0123, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8() 65 vn4567 = _mm_sub_ps(vn4567, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8() 120 const __m128 vx = _mm_sub_ps(vi, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8() 130 vn = _mm_sub_ps(vn, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8() 168 const __m128 vx = _mm_sub_ps(vi, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8() 178 vn = _mm_sub_ps(vn, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8()
|
D | sse2-p5-x8-acc2.c | 52 const __m128 vx0123 = _mm_sub_ps(vi0123, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2() 53 const __m128 vx4567 = _mm_sub_ps(vi4567, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2() 65 vn0123 = _mm_sub_ps(vn0123, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2() 66 vn4567 = _mm_sub_ps(vn4567, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2() 123 const __m128 vx = _mm_sub_ps(vi, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2() 133 vn = _mm_sub_ps(vn, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2() 171 const __m128 vx = _mm_sub_ps(vi, vi_max); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2() 181 vn = _mm_sub_ps(vn, vmagic_bias); in xnn_f32_raddstoreexpminusmax_ukernel__sse2_p5_x8_acc2()
|
/external/XNNPACK/src/f32-sigmoid/gen/ |
D | sse41-p5-div-x24.c | 88 vn0123 = _mm_sub_ps(vn0123, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24() 89 vn4567 = _mm_sub_ps(vn4567, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24() 90 vn89AB = _mm_sub_ps(vn89AB, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24() 91 vnCDEF = _mm_sub_ps(vnCDEF, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24() 92 vnGHIJ = _mm_sub_ps(vnGHIJ, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24() 93 vnKLMN = _mm_sub_ps(vnKLMN, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24() 184 vf0123 = _mm_blendv_ps(_mm_sub_ps(vone, vf0123), vf0123, vx0123); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24() 185 vf4567 = _mm_blendv_ps(_mm_sub_ps(vone, vf4567), vf4567, vx4567); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24() 186 vf89AB = _mm_blendv_ps(_mm_sub_ps(vone, vf89AB), vf89AB, vx89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24() 187 vfCDEF = _mm_blendv_ps(_mm_sub_ps(vone, vfCDEF), vfCDEF, vxCDEF); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24() [all …]
|
D | sse41-p5-div-x20.c | 84 vn0123 = _mm_sub_ps(vn0123, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20() 85 vn4567 = _mm_sub_ps(vn4567, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20() 86 vn89AB = _mm_sub_ps(vn89AB, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20() 87 vnCDEF = _mm_sub_ps(vnCDEF, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20() 88 vnGHIJ = _mm_sub_ps(vnGHIJ, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20() 168 vf0123 = _mm_blendv_ps(_mm_sub_ps(vone, vf0123), vf0123, vx0123); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20() 169 vf4567 = _mm_blendv_ps(_mm_sub_ps(vone, vf4567), vf4567, vx4567); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20() 170 vf89AB = _mm_blendv_ps(_mm_sub_ps(vone, vf89AB), vf89AB, vx89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20() 171 vfCDEF = _mm_blendv_ps(_mm_sub_ps(vone, vfCDEF), vfCDEF, vxCDEF); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20() 172 vfGHIJ = _mm_blendv_ps(_mm_sub_ps(vone, vfGHIJ), vfGHIJ, vxGHIJ); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20() [all …]
|
D | sse2-p5-div-x24.c | 88 vn0123 = _mm_sub_ps(vn0123, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24() 89 vn4567 = _mm_sub_ps(vn4567, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24() 90 vn89AB = _mm_sub_ps(vn89AB, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24() 91 vnCDEF = _mm_sub_ps(vnCDEF, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24() 92 vnGHIJ = _mm_sub_ps(vnGHIJ, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24() 93 vnKLMN = _mm_sub_ps(vnKLMN, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24() 191 vf0123 = _mm_or_ps(_mm_and_ps(vf0123, vm0123), _mm_andnot_ps(vm0123, _mm_sub_ps(vone, vf0123))); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24() 192 vf4567 = _mm_or_ps(_mm_and_ps(vf4567, vm4567), _mm_andnot_ps(vm4567, _mm_sub_ps(vone, vf4567))); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24() 193 vf89AB = _mm_or_ps(_mm_and_ps(vf89AB, vm89AB), _mm_andnot_ps(vm89AB, _mm_sub_ps(vone, vf89AB))); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24() 194 vfCDEF = _mm_or_ps(_mm_and_ps(vfCDEF, vmCDEF), _mm_andnot_ps(vmCDEF, _mm_sub_ps(vone, vfCDEF))); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x24() [all …]
|
D | sse2-p5-div-x20.c | 84 vn0123 = _mm_sub_ps(vn0123, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20() 85 vn4567 = _mm_sub_ps(vn4567, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20() 86 vn89AB = _mm_sub_ps(vn89AB, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20() 87 vnCDEF = _mm_sub_ps(vnCDEF, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20() 88 vnGHIJ = _mm_sub_ps(vnGHIJ, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20() 174 vf0123 = _mm_or_ps(_mm_and_ps(vf0123, vm0123), _mm_andnot_ps(vm0123, _mm_sub_ps(vone, vf0123))); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20() 175 vf4567 = _mm_or_ps(_mm_and_ps(vf4567, vm4567), _mm_andnot_ps(vm4567, _mm_sub_ps(vone, vf4567))); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20() 176 vf89AB = _mm_or_ps(_mm_and_ps(vf89AB, vm89AB), _mm_andnot_ps(vm89AB, _mm_sub_ps(vone, vf89AB))); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20() 177 vfCDEF = _mm_or_ps(_mm_and_ps(vfCDEF, vmCDEF), _mm_andnot_ps(vmCDEF, _mm_sub_ps(vone, vfCDEF))); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20() 178 vfGHIJ = _mm_or_ps(_mm_and_ps(vfGHIJ, vmGHIJ), _mm_andnot_ps(vmGHIJ, _mm_sub_ps(vone, vfGHIJ))); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x20() [all …]
|
D | sse41-p5-div-x16.c | 80 vn0123 = _mm_sub_ps(vn0123, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16() 81 vn4567 = _mm_sub_ps(vn4567, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16() 82 vn89AB = _mm_sub_ps(vn89AB, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16() 83 vnCDEF = _mm_sub_ps(vnCDEF, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16() 152 vf0123 = _mm_blendv_ps(_mm_sub_ps(vone, vf0123), vf0123, vx0123); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16() 153 vf4567 = _mm_blendv_ps(_mm_sub_ps(vone, vf4567), vf4567, vx4567); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16() 154 vf89AB = _mm_blendv_ps(_mm_sub_ps(vone, vf89AB), vf89AB, vx89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16() 155 vfCDEF = _mm_blendv_ps(_mm_sub_ps(vone, vfCDEF), vfCDEF, vxCDEF); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16() 190 vn = _mm_sub_ps(vn, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16() 221 vf = _mm_blendv_ps(_mm_sub_ps(vone, vf), vf, vx); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16() [all …]
|
D | sse41-p5-div-x12.c | 76 vn0123 = _mm_sub_ps(vn0123, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12() 77 vn4567 = _mm_sub_ps(vn4567, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12() 78 vn89AB = _mm_sub_ps(vn89AB, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12() 136 vf0123 = _mm_blendv_ps(_mm_sub_ps(vone, vf0123), vf0123, vx0123); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12() 137 vf4567 = _mm_blendv_ps(_mm_sub_ps(vone, vf4567), vf4567, vx4567); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12() 138 vf89AB = _mm_blendv_ps(_mm_sub_ps(vone, vf89AB), vf89AB, vx89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12() 172 vn = _mm_sub_ps(vn, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12() 203 vf = _mm_blendv_ps(_mm_sub_ps(vone, vf), vf, vx); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12() 235 vn = _mm_sub_ps(vn, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12() 266 vf = _mm_blendv_ps(_mm_sub_ps(vone, vf), vf, vx); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12()
|
D | sse2-p5-div-x16.c | 80 vn0123 = _mm_sub_ps(vn0123, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x16() 81 vn4567 = _mm_sub_ps(vn4567, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x16() 82 vn89AB = _mm_sub_ps(vn89AB, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x16() 83 vnCDEF = _mm_sub_ps(vnCDEF, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x16() 157 vf0123 = _mm_or_ps(_mm_and_ps(vf0123, vm0123), _mm_andnot_ps(vm0123, _mm_sub_ps(vone, vf0123))); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x16() 158 vf4567 = _mm_or_ps(_mm_and_ps(vf4567, vm4567), _mm_andnot_ps(vm4567, _mm_sub_ps(vone, vf4567))); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x16() 159 vf89AB = _mm_or_ps(_mm_and_ps(vf89AB, vm89AB), _mm_andnot_ps(vm89AB, _mm_sub_ps(vone, vf89AB))); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x16() 160 vfCDEF = _mm_or_ps(_mm_and_ps(vfCDEF, vmCDEF), _mm_andnot_ps(vmCDEF, _mm_sub_ps(vone, vfCDEF))); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x16() 195 vn = _mm_sub_ps(vn, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x16() 227 vf = _mm_or_ps(_mm_and_ps(vf, vm), _mm_andnot_ps(vm, _mm_sub_ps(vone, vf))); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x16() [all …]
|
D | sse2-p5-div-x12.c | 76 vn0123 = _mm_sub_ps(vn0123, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12() 77 vn4567 = _mm_sub_ps(vn4567, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12() 78 vn89AB = _mm_sub_ps(vn89AB, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12() 140 vf0123 = _mm_or_ps(_mm_and_ps(vf0123, vm0123), _mm_andnot_ps(vm0123, _mm_sub_ps(vone, vf0123))); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12() 141 vf4567 = _mm_or_ps(_mm_and_ps(vf4567, vm4567), _mm_andnot_ps(vm4567, _mm_sub_ps(vone, vf4567))); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12() 142 vf89AB = _mm_or_ps(_mm_and_ps(vf89AB, vm89AB), _mm_andnot_ps(vm89AB, _mm_sub_ps(vone, vf89AB))); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12() 176 vn = _mm_sub_ps(vn, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12() 208 vf = _mm_or_ps(_mm_and_ps(vf, vm), _mm_andnot_ps(vm, _mm_sub_ps(vone, vf))); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12() 240 vn = _mm_sub_ps(vn, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12() 272 vf = _mm_or_ps(_mm_and_ps(vf, vm), _mm_andnot_ps(vm, _mm_sub_ps(vone, vf))); in xnn_f32_sigmoid_ukernel__sse2_p5_div_x12()
|
D | sse41-p5-div-x8.c | 72 vn0123 = _mm_sub_ps(vn0123, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8() 73 vn4567 = _mm_sub_ps(vn4567, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8() 120 vf0123 = _mm_blendv_ps(_mm_sub_ps(vone, vf0123), vf0123, vx0123); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8() 121 vf4567 = _mm_blendv_ps(_mm_sub_ps(vone, vf4567), vf4567, vx4567); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8() 154 vn = _mm_sub_ps(vn, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8() 185 vf = _mm_blendv_ps(_mm_sub_ps(vone, vf), vf, vx); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8() 217 vn = _mm_sub_ps(vn, vmagic_bias); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8() 248 vf = _mm_blendv_ps(_mm_sub_ps(vone, vf), vf, vx); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8()
|
/external/libaom/libaom/aom_dsp/x86/ |
D | fft_sse2.c | 75 real1 = _mm_sub_ps(real1, real2); in aom_fft_unpack_2d_output_sse2() 99 imag1 = _mm_sub_ps(imag2, imag1); in aom_fft_unpack_2d_output_sse2() 109 _mm_set1_ps, _mm_add_ps, _mm_sub_ps); 111 _mm_set1_ps, _mm_add_ps, _mm_sub_ps, _mm_mul_ps); 113 _mm_set1_ps, _mm_add_ps, _mm_sub_ps, _mm_mul_ps); 115 _mm_set1_ps, _mm_add_ps, _mm_sub_ps, _mm_mul_ps); 139 _mm_set1_ps, _mm_add_ps, _mm_sub_ps); 141 _mm_set1_ps, _mm_add_ps, _mm_sub_ps, _mm_mul_ps); 143 _mm_set1_ps, _mm_add_ps, _mm_sub_ps, _mm_mul_ps); 145 _mm_set1_ps, _mm_add_ps, _mm_sub_ps, _mm_mul_ps);
|
/external/XNNPACK/src/f32-bilinear/ |
D | sse.c.in | 60 const __m128 vtd${ABC[C:C+4]} = _mm_sub_ps(vtr${ABC[C:C+4]}, vtl${ABC[C:C+4]}); 61 const __m128 vbd${ABC[C:C+4]} = _mm_sub_ps(vbr${ABC[C:C+4]}, vbl${ABC[C:C+4]}); 68 const __m128 vd${ABC[C:C+4]} = _mm_sub_ps(vb${ABC[C:C+4]}, vt${ABC[C:C+4]}); 89 const __m128 vtd0123 = _mm_sub_ps(vtr0123, vtl0123); 90 const __m128 vbd0123 = _mm_sub_ps(vbr0123, vbl0123); 95 const __m128 vd0123 = _mm_sub_ps(vb0123, vt0123); 108 const __m128 vtd0123 = _mm_sub_ps(vtr0123, vtl0123); 109 const __m128 vbd0123 = _mm_sub_ps(vbr0123, vbl0123); 114 const __m128 vd0123 = _mm_sub_ps(vb0123, vt0123);
|