Home
last modified time | relevance | path

Searched refs:_mm_max_ps (Results 1 – 25 of 115) sorted by relevance

12345

/external/XNNPACK/src/f32-maxpool/
D9p8x-sse-c4.c97 const __m128 vmax018 = _mm_max_ps(_mm_max_ps(vi0, vi1), vi8); in xnn_f32_maxpool_ukernel_9p8x__sse_c4()
98 const __m128 vmax23 = _mm_max_ps(vi2, vi3); in xnn_f32_maxpool_ukernel_9p8x__sse_c4()
99 const __m128 vmax45 = _mm_max_ps(vi4, vi5); in xnn_f32_maxpool_ukernel_9p8x__sse_c4()
100 const __m128 vmax67 = _mm_max_ps(vi6, vi7); in xnn_f32_maxpool_ukernel_9p8x__sse_c4()
102 const __m128 vmax2345 = _mm_max_ps(vmax23, vmax45); in xnn_f32_maxpool_ukernel_9p8x__sse_c4()
103 const __m128 vmax01678 = _mm_max_ps(vmax018, vmax67); in xnn_f32_maxpool_ukernel_9p8x__sse_c4()
104 const __m128 vmax = _mm_max_ps(vmax2345, vmax01678); in xnn_f32_maxpool_ukernel_9p8x__sse_c4()
105 const __m128 vout = _mm_max_ps(_mm_min_ps(vmax, voutput_max), voutput_min); in xnn_f32_maxpool_ukernel_9p8x__sse_c4()
130 const __m128 vmax018 = _mm_max_ps(_mm_max_ps(vi0, vi1), vi8); in xnn_f32_maxpool_ukernel_9p8x__sse_c4()
131 const __m128 vmax23 = _mm_max_ps(vi2, vi3); in xnn_f32_maxpool_ukernel_9p8x__sse_c4()
[all …]
/external/XNNPACK/src/f32-rmax/
Dsse.c33 vmax0 = _mm_max_ps(vmax0, vx0); in xnn_f32_rmax_ukernel__sse()
34 vmax1 = _mm_max_ps(vmax1, vx1); in xnn_f32_rmax_ukernel__sse()
35 vmax2 = _mm_max_ps(vmax2, vx2); in xnn_f32_rmax_ukernel__sse()
36 vmax3 = _mm_max_ps(vmax3, vx3); in xnn_f32_rmax_ukernel__sse()
38 __m128 vmax = _mm_max_ps(_mm_max_ps(vmax0, vmax1), _mm_max_ps(vmax2, vmax3)); in xnn_f32_rmax_ukernel__sse()
41 vmax = _mm_max_ps(vmax, vx); in xnn_f32_rmax_ukernel__sse()
44 __m128 vmax_lo = _mm_max_ps(vmax, _mm_movehl_ps(vmax, vmax)); in xnn_f32_rmax_ukernel__sse()
/external/XNNPACK/src/f32-argmaxpool/
D9p8x-sse2-c4.c82 vmax = _mm_max_ps(vi1, vmax); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4()
86 vmax = _mm_max_ps(vi2, vmax); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4()
90 vmax = _mm_max_ps(vi3, vmax); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4()
94 vmax = _mm_max_ps(vi4, vmax); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4()
98 vmax = _mm_max_ps(vi5, vmax); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4()
102 vmax = _mm_max_ps(vi6, vmax); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4()
106 vmax = _mm_max_ps(vi7, vmax); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4()
110 vmax = _mm_max_ps(vi8, vmax); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4()
167 vmax = _mm_max_ps(vi0, vmax); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4()
172 vmax = _mm_max_ps(vi1, vmax); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4()
[all …]
D9x-sse2-c4.c104 vmax = _mm_max_ps(vi1, vmax); in xnn_f32_argmaxpool_ukernel_9x__sse2_c4()
108 vmax = _mm_max_ps(vi2, vmax); in xnn_f32_argmaxpool_ukernel_9x__sse2_c4()
112 vmax = _mm_max_ps(vi3, vmax); in xnn_f32_argmaxpool_ukernel_9x__sse2_c4()
116 vmax = _mm_max_ps(vi4, vmax); in xnn_f32_argmaxpool_ukernel_9x__sse2_c4()
120 vmax = _mm_max_ps(vi5, vmax); in xnn_f32_argmaxpool_ukernel_9x__sse2_c4()
124 vmax = _mm_max_ps(vi6, vmax); in xnn_f32_argmaxpool_ukernel_9x__sse2_c4()
128 vmax = _mm_max_ps(vi7, vmax); in xnn_f32_argmaxpool_ukernel_9x__sse2_c4()
132 vmax = _mm_max_ps(vi8, vmax); in xnn_f32_argmaxpool_ukernel_9x__sse2_c4()
135 const __m128 vout = _mm_max_ps(_mm_min_ps(vmax, voutput_max), voutput_min); in xnn_f32_argmaxpool_ukernel_9x__sse2_c4()
157 vmax = _mm_max_ps(vi1, vmax); in xnn_f32_argmaxpool_ukernel_9x__sse2_c4()
[all …]
D4x-sse2-c4.c66 vmax = _mm_max_ps(vi1, vmax); in xnn_f32_argmaxpool_ukernel_4x__sse2_c4()
70 vmax = _mm_max_ps(vi2, vmax); in xnn_f32_argmaxpool_ukernel_4x__sse2_c4()
74 vmax = _mm_max_ps(vi3, vmax); in xnn_f32_argmaxpool_ukernel_4x__sse2_c4()
77 const __m128 vout = _mm_max_ps(_mm_min_ps(vmax, voutput_max), voutput_min); in xnn_f32_argmaxpool_ukernel_4x__sse2_c4()
94 vmax = _mm_max_ps(vi1, vmax); in xnn_f32_argmaxpool_ukernel_4x__sse2_c4()
98 vmax = _mm_max_ps(vi2, vmax); in xnn_f32_argmaxpool_ukernel_4x__sse2_c4()
102 vmax = _mm_max_ps(vi3, vmax); in xnn_f32_argmaxpool_ukernel_4x__sse2_c4()
105 __m128 vout = _mm_max_ps(_mm_min_ps(vmax, voutput_max), voutput_min); in xnn_f32_argmaxpool_ukernel_4x__sse2_c4()
/external/XNNPACK/src/f32-vbinary/gen/
Dvmaxc-sse-x8.c37 __m128 vy0123 = _mm_max_ps(va0123, vb); in xnn_f32_vmaxc_ukernel__sse_x8()
38 __m128 vy4567 = _mm_max_ps(va4567, vb); in xnn_f32_vmaxc_ukernel__sse_x8()
40 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vmaxc_ukernel__sse_x8()
41 vy4567 = _mm_max_ps(vy4567, vy_min); in xnn_f32_vmaxc_ukernel__sse_x8()
54 __m128 vy0123 = _mm_max_ps(va0123, vb); in xnn_f32_vmaxc_ukernel__sse_x8()
55 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vmaxc_ukernel__sse_x8()
63 __m128 vy0123 = _mm_max_ps(va0123, vb); in xnn_f32_vmaxc_ukernel__sse_x8()
64 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vmaxc_ukernel__sse_x8()
Dvmax-sse-x8.c40 __m128 vy0123 = _mm_max_ps(va0123, vb0123); in xnn_f32_vmax_ukernel__sse_x8()
41 __m128 vy4567 = _mm_max_ps(va4567, vb4567); in xnn_f32_vmax_ukernel__sse_x8()
43 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vmax_ukernel__sse_x8()
44 vy4567 = _mm_max_ps(vy4567, vy_min); in xnn_f32_vmax_ukernel__sse_x8()
60 __m128 vy0123 = _mm_max_ps(va0123, vb0123); in xnn_f32_vmax_ukernel__sse_x8()
61 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vmax_ukernel__sse_x8()
70 __m128 vy0123 = _mm_max_ps(va0123, vb0123); in xnn_f32_vmax_ukernel__sse_x8()
71 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vmax_ukernel__sse_x8()
Dvmaxc-sse-x4.c36 __m128 vy0123 = _mm_max_ps(va0123, vb); in xnn_f32_vmaxc_ukernel__sse_x4()
38 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vmaxc_ukernel__sse_x4()
49 __m128 vy0123 = _mm_max_ps(va0123, vb); in xnn_f32_vmaxc_ukernel__sse_x4()
50 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vmaxc_ukernel__sse_x4()
58 __m128 vy0123 = _mm_max_ps(va0123, vb); in xnn_f32_vmaxc_ukernel__sse_x4()
59 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vmaxc_ukernel__sse_x4()
Dvmax-sse-x4.c38 __m128 vy0123 = _mm_max_ps(va0123, vb0123); in xnn_f32_vmax_ukernel__sse_x4()
40 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vmax_ukernel__sse_x4()
54 __m128 vy0123 = _mm_max_ps(va0123, vb0123); in xnn_f32_vmax_ukernel__sse_x4()
55 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vmax_ukernel__sse_x4()
64 __m128 vy0123 = _mm_max_ps(va0123, vb0123); in xnn_f32_vmax_ukernel__sse_x4()
65 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vmax_ukernel__sse_x4()
Dvrsubc-sse-x8.c40 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vrsubc_ukernel__sse_x8()
41 vy4567 = _mm_max_ps(vy4567, vy_min); in xnn_f32_vrsubc_ukernel__sse_x8()
55 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vrsubc_ukernel__sse_x8()
64 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vrsubc_ukernel__sse_x8()
Dvmulc-sse-x8.c40 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vmulc_ukernel__sse_x8()
41 vy4567 = _mm_max_ps(vy4567, vy_min); in xnn_f32_vmulc_ukernel__sse_x8()
55 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vmulc_ukernel__sse_x8()
64 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vmulc_ukernel__sse_x8()
Dvsubc-sse-x8.c40 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vsubc_ukernel__sse_x8()
41 vy4567 = _mm_max_ps(vy4567, vy_min); in xnn_f32_vsubc_ukernel__sse_x8()
55 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vsubc_ukernel__sse_x8()
64 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vsubc_ukernel__sse_x8()
Dvminc-sse-x8.c40 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vminc_ukernel__sse_x8()
41 vy4567 = _mm_max_ps(vy4567, vy_min); in xnn_f32_vminc_ukernel__sse_x8()
55 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vminc_ukernel__sse_x8()
64 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vminc_ukernel__sse_x8()
Dvrdivc-sse-x8.c40 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vrdivc_ukernel__sse_x8()
41 vy4567 = _mm_max_ps(vy4567, vy_min); in xnn_f32_vrdivc_ukernel__sse_x8()
55 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vrdivc_ukernel__sse_x8()
64 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vrdivc_ukernel__sse_x8()
Dvdivc-sse-x8.c40 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vdivc_ukernel__sse_x8()
41 vy4567 = _mm_max_ps(vy4567, vy_min); in xnn_f32_vdivc_ukernel__sse_x8()
55 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vdivc_ukernel__sse_x8()
64 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vdivc_ukernel__sse_x8()
Dvaddc-sse-x8.c40 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vaddc_ukernel__sse_x8()
41 vy4567 = _mm_max_ps(vy4567, vy_min); in xnn_f32_vaddc_ukernel__sse_x8()
55 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vaddc_ukernel__sse_x8()
64 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vaddc_ukernel__sse_x8()
Dvmin-sse-x8.c43 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vmin_ukernel__sse_x8()
44 vy4567 = _mm_max_ps(vy4567, vy_min); in xnn_f32_vmin_ukernel__sse_x8()
61 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vmin_ukernel__sse_x8()
71 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vmin_ukernel__sse_x8()
Dvdiv-sse-x8.c43 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vdiv_ukernel__sse_x8()
44 vy4567 = _mm_max_ps(vy4567, vy_min); in xnn_f32_vdiv_ukernel__sse_x8()
61 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vdiv_ukernel__sse_x8()
71 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vdiv_ukernel__sse_x8()
/external/XNNPACK/src/f32-prelu/gen/
Dsse41-2x8.c71 vacc0x0123 = _mm_max_ps(vacc0x0123, vmin); in xnn_f32_prelu_ukernel__sse41_2x8()
72 vacc0x4567 = _mm_max_ps(vacc0x4567, vmin); in xnn_f32_prelu_ukernel__sse41_2x8()
73 vacc1x0123 = _mm_max_ps(vacc1x0123, vmin); in xnn_f32_prelu_ukernel__sse41_2x8()
74 vacc1x4567 = _mm_max_ps(vacc1x4567, vmin); in xnn_f32_prelu_ukernel__sse41_2x8()
103 vacc0x0123 = _mm_max_ps(vacc0x0123, vmin); in xnn_f32_prelu_ukernel__sse41_2x8()
104 vacc1x0123 = _mm_max_ps(vacc1x0123, vmin); in xnn_f32_prelu_ukernel__sse41_2x8()
129 vacc0x0123 = _mm_max_ps(vacc0x0123, vmin); in xnn_f32_prelu_ukernel__sse41_2x8()
130 vacc1x0123 = _mm_max_ps(vacc1x0123, vmin); in xnn_f32_prelu_ukernel__sse41_2x8()
Dsse2-2x8.c75 vacc0x0123 = _mm_max_ps(vacc0x0123, vmin); in xnn_f32_prelu_ukernel__sse2_2x8()
76 vacc0x4567 = _mm_max_ps(vacc0x4567, vmin); in xnn_f32_prelu_ukernel__sse2_2x8()
77 vacc1x0123 = _mm_max_ps(vacc1x0123, vmin); in xnn_f32_prelu_ukernel__sse2_2x8()
78 vacc1x4567 = _mm_max_ps(vacc1x4567, vmin); in xnn_f32_prelu_ukernel__sse2_2x8()
109 vacc0x0123 = _mm_max_ps(vacc0x0123, vmin); in xnn_f32_prelu_ukernel__sse2_2x8()
110 vacc1x0123 = _mm_max_ps(vacc1x0123, vmin); in xnn_f32_prelu_ukernel__sse2_2x8()
137 vacc0x0123 = _mm_max_ps(vacc0x0123, vmin); in xnn_f32_prelu_ukernel__sse2_2x8()
138 vacc1x0123 = _mm_max_ps(vacc1x0123, vmin); in xnn_f32_prelu_ukernel__sse2_2x8()
/external/XNNPACK/src/f32-vmulcaddc/gen/
Dc8-sse-2x.c73 vacc0x0123 = _mm_max_ps(vacc0x0123, vmin); in xnn_f32_vmulcaddc_ukernel_c8__sse_2x()
74 vacc0x4567 = _mm_max_ps(vacc0x4567, vmin); in xnn_f32_vmulcaddc_ukernel_c8__sse_2x()
75 vacc1x0123 = _mm_max_ps(vacc1x0123, vmin); in xnn_f32_vmulcaddc_ukernel_c8__sse_2x()
76 vacc1x4567 = _mm_max_ps(vacc1x4567, vmin); in xnn_f32_vmulcaddc_ukernel_c8__sse_2x()
108 vacc0x0123 = _mm_max_ps(vacc0x0123, vmin); in xnn_f32_vmulcaddc_ukernel_c8__sse_2x()
109 vacc1x0123 = _mm_max_ps(vacc1x0123, vmin); in xnn_f32_vmulcaddc_ukernel_c8__sse_2x()
137 vacc0x0123 = _mm_max_ps(vacc0x0123, vmin); in xnn_f32_vmulcaddc_ukernel_c8__sse_2x()
138 vacc1x0123 = _mm_max_ps(vacc1x0123, vmin); in xnn_f32_vmulcaddc_ukernel_c8__sse_2x()
/external/XNNPACK/src/f32-ppmm/gen/
D4x8-sse.c96 vacc0x0123 = _mm_max_ps(vacc0x0123, vmin); in xnn_f32_ppmm_ukernel_4x8__sse()
97 vacc1x0123 = _mm_max_ps(vacc1x0123, vmin); in xnn_f32_ppmm_ukernel_4x8__sse()
98 vacc2x0123 = _mm_max_ps(vacc2x0123, vmin); in xnn_f32_ppmm_ukernel_4x8__sse()
99 vacc3x0123 = _mm_max_ps(vacc3x0123, vmin); in xnn_f32_ppmm_ukernel_4x8__sse()
100 vacc0x4567 = _mm_max_ps(vacc0x4567, vmin); in xnn_f32_ppmm_ukernel_4x8__sse()
101 vacc1x4567 = _mm_max_ps(vacc1x4567, vmin); in xnn_f32_ppmm_ukernel_4x8__sse()
102 vacc2x4567 = _mm_max_ps(vacc2x4567, vmin); in xnn_f32_ppmm_ukernel_4x8__sse()
103 vacc3x4567 = _mm_max_ps(vacc3x4567, vmin); in xnn_f32_ppmm_ukernel_4x8__sse()
/external/XNNPACK/src/f32-gemm/gen/
D4x8-sse-load1.c108 vacc0x0123 = _mm_max_ps(vacc0x0123, vmin); in xnn_f32_gemm_ukernel_4x8__sse_load1()
109 vacc1x0123 = _mm_max_ps(vacc1x0123, vmin); in xnn_f32_gemm_ukernel_4x8__sse_load1()
110 vacc2x0123 = _mm_max_ps(vacc2x0123, vmin); in xnn_f32_gemm_ukernel_4x8__sse_load1()
111 vacc3x0123 = _mm_max_ps(vacc3x0123, vmin); in xnn_f32_gemm_ukernel_4x8__sse_load1()
112 vacc0x4567 = _mm_max_ps(vacc0x4567, vmin); in xnn_f32_gemm_ukernel_4x8__sse_load1()
113 vacc1x4567 = _mm_max_ps(vacc1x4567, vmin); in xnn_f32_gemm_ukernel_4x8__sse_load1()
114 vacc2x4567 = _mm_max_ps(vacc2x4567, vmin); in xnn_f32_gemm_ukernel_4x8__sse_load1()
115 vacc3x4567 = _mm_max_ps(vacc3x4567, vmin); in xnn_f32_gemm_ukernel_4x8__sse_load1()
/external/XNNPACK/src/f32-igemm/gen/
D4x8-sse-load1.c131 vacc0x0123 = _mm_max_ps(vacc0x0123, vmin); in xnn_f32_igemm_ukernel_4x8__sse_load1()
132 vacc1x0123 = _mm_max_ps(vacc1x0123, vmin); in xnn_f32_igemm_ukernel_4x8__sse_load1()
133 vacc2x0123 = _mm_max_ps(vacc2x0123, vmin); in xnn_f32_igemm_ukernel_4x8__sse_load1()
134 vacc3x0123 = _mm_max_ps(vacc3x0123, vmin); in xnn_f32_igemm_ukernel_4x8__sse_load1()
135 vacc0x4567 = _mm_max_ps(vacc0x4567, vmin); in xnn_f32_igemm_ukernel_4x8__sse_load1()
136 vacc1x4567 = _mm_max_ps(vacc1x4567, vmin); in xnn_f32_igemm_ukernel_4x8__sse_load1()
137 vacc2x4567 = _mm_max_ps(vacc2x4567, vmin); in xnn_f32_igemm_ukernel_4x8__sse_load1()
138 vacc3x4567 = _mm_max_ps(vacc3x4567, vmin); in xnn_f32_igemm_ukernel_4x8__sse_load1()
/external/XNNPACK/src/f32-gemm/gen-inc/
D4x8-sse-load1.c110 vacc0x0123 = _mm_max_ps(vacc0x0123, vmin); in xnn_f32_gemminc_ukernel_4x8__sse_load1()
111 vacc1x0123 = _mm_max_ps(vacc1x0123, vmin); in xnn_f32_gemminc_ukernel_4x8__sse_load1()
112 vacc2x0123 = _mm_max_ps(vacc2x0123, vmin); in xnn_f32_gemminc_ukernel_4x8__sse_load1()
113 vacc3x0123 = _mm_max_ps(vacc3x0123, vmin); in xnn_f32_gemminc_ukernel_4x8__sse_load1()
114 vacc0x4567 = _mm_max_ps(vacc0x4567, vmin); in xnn_f32_gemminc_ukernel_4x8__sse_load1()
115 vacc1x4567 = _mm_max_ps(vacc1x4567, vmin); in xnn_f32_gemminc_ukernel_4x8__sse_load1()
116 vacc2x4567 = _mm_max_ps(vacc2x4567, vmin); in xnn_f32_gemminc_ukernel_4x8__sse_load1()
117 vacc3x4567 = _mm_max_ps(vacc3x4567, vmin); in xnn_f32_gemminc_ukernel_4x8__sse_load1()

12345