/external/XNNPACK/src/f32-maxpool/ |
D | 9p8x-sse-c4.c | 97 const __m128 vmax018 = _mm_max_ps(_mm_max_ps(vi0, vi1), vi8); in xnn_f32_maxpool_ukernel_9p8x__sse_c4() 98 const __m128 vmax23 = _mm_max_ps(vi2, vi3); in xnn_f32_maxpool_ukernel_9p8x__sse_c4() 99 const __m128 vmax45 = _mm_max_ps(vi4, vi5); in xnn_f32_maxpool_ukernel_9p8x__sse_c4() 100 const __m128 vmax67 = _mm_max_ps(vi6, vi7); in xnn_f32_maxpool_ukernel_9p8x__sse_c4() 102 const __m128 vmax2345 = _mm_max_ps(vmax23, vmax45); in xnn_f32_maxpool_ukernel_9p8x__sse_c4() 103 const __m128 vmax01678 = _mm_max_ps(vmax018, vmax67); in xnn_f32_maxpool_ukernel_9p8x__sse_c4() 104 const __m128 vmax = _mm_max_ps(vmax2345, vmax01678); in xnn_f32_maxpool_ukernel_9p8x__sse_c4() 105 const __m128 vout = _mm_max_ps(_mm_min_ps(vmax, voutput_max), voutput_min); in xnn_f32_maxpool_ukernel_9p8x__sse_c4() 130 const __m128 vmax018 = _mm_max_ps(_mm_max_ps(vi0, vi1), vi8); in xnn_f32_maxpool_ukernel_9p8x__sse_c4() 131 const __m128 vmax23 = _mm_max_ps(vi2, vi3); in xnn_f32_maxpool_ukernel_9p8x__sse_c4() [all …]
|
/external/XNNPACK/src/f32-rmax/ |
D | sse.c | 33 vmax0 = _mm_max_ps(vmax0, vx0); in xnn_f32_rmax_ukernel__sse() 34 vmax1 = _mm_max_ps(vmax1, vx1); in xnn_f32_rmax_ukernel__sse() 35 vmax2 = _mm_max_ps(vmax2, vx2); in xnn_f32_rmax_ukernel__sse() 36 vmax3 = _mm_max_ps(vmax3, vx3); in xnn_f32_rmax_ukernel__sse() 38 __m128 vmax = _mm_max_ps(_mm_max_ps(vmax0, vmax1), _mm_max_ps(vmax2, vmax3)); in xnn_f32_rmax_ukernel__sse() 41 vmax = _mm_max_ps(vmax, vx); in xnn_f32_rmax_ukernel__sse() 44 __m128 vmax_lo = _mm_max_ps(vmax, _mm_movehl_ps(vmax, vmax)); in xnn_f32_rmax_ukernel__sse()
|
/external/XNNPACK/src/f32-argmaxpool/ |
D | 9p8x-sse2-c4.c | 82 vmax = _mm_max_ps(vi1, vmax); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4() 86 vmax = _mm_max_ps(vi2, vmax); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4() 90 vmax = _mm_max_ps(vi3, vmax); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4() 94 vmax = _mm_max_ps(vi4, vmax); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4() 98 vmax = _mm_max_ps(vi5, vmax); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4() 102 vmax = _mm_max_ps(vi6, vmax); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4() 106 vmax = _mm_max_ps(vi7, vmax); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4() 110 vmax = _mm_max_ps(vi8, vmax); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4() 167 vmax = _mm_max_ps(vi0, vmax); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4() 172 vmax = _mm_max_ps(vi1, vmax); in xnn_f32_argmaxpool_ukernel_9p8x__sse2_c4() [all …]
|
D | 9x-sse2-c4.c | 104 vmax = _mm_max_ps(vi1, vmax); in xnn_f32_argmaxpool_ukernel_9x__sse2_c4() 108 vmax = _mm_max_ps(vi2, vmax); in xnn_f32_argmaxpool_ukernel_9x__sse2_c4() 112 vmax = _mm_max_ps(vi3, vmax); in xnn_f32_argmaxpool_ukernel_9x__sse2_c4() 116 vmax = _mm_max_ps(vi4, vmax); in xnn_f32_argmaxpool_ukernel_9x__sse2_c4() 120 vmax = _mm_max_ps(vi5, vmax); in xnn_f32_argmaxpool_ukernel_9x__sse2_c4() 124 vmax = _mm_max_ps(vi6, vmax); in xnn_f32_argmaxpool_ukernel_9x__sse2_c4() 128 vmax = _mm_max_ps(vi7, vmax); in xnn_f32_argmaxpool_ukernel_9x__sse2_c4() 132 vmax = _mm_max_ps(vi8, vmax); in xnn_f32_argmaxpool_ukernel_9x__sse2_c4() 135 const __m128 vout = _mm_max_ps(_mm_min_ps(vmax, voutput_max), voutput_min); in xnn_f32_argmaxpool_ukernel_9x__sse2_c4() 157 vmax = _mm_max_ps(vi1, vmax); in xnn_f32_argmaxpool_ukernel_9x__sse2_c4() [all …]
|
D | 4x-sse2-c4.c | 66 vmax = _mm_max_ps(vi1, vmax); in xnn_f32_argmaxpool_ukernel_4x__sse2_c4() 70 vmax = _mm_max_ps(vi2, vmax); in xnn_f32_argmaxpool_ukernel_4x__sse2_c4() 74 vmax = _mm_max_ps(vi3, vmax); in xnn_f32_argmaxpool_ukernel_4x__sse2_c4() 77 const __m128 vout = _mm_max_ps(_mm_min_ps(vmax, voutput_max), voutput_min); in xnn_f32_argmaxpool_ukernel_4x__sse2_c4() 94 vmax = _mm_max_ps(vi1, vmax); in xnn_f32_argmaxpool_ukernel_4x__sse2_c4() 98 vmax = _mm_max_ps(vi2, vmax); in xnn_f32_argmaxpool_ukernel_4x__sse2_c4() 102 vmax = _mm_max_ps(vi3, vmax); in xnn_f32_argmaxpool_ukernel_4x__sse2_c4() 105 __m128 vout = _mm_max_ps(_mm_min_ps(vmax, voutput_max), voutput_min); in xnn_f32_argmaxpool_ukernel_4x__sse2_c4()
|
/external/XNNPACK/src/f32-vbinary/gen/ |
D | vmaxc-sse-x8.c | 37 __m128 vy0123 = _mm_max_ps(va0123, vb); in xnn_f32_vmaxc_ukernel__sse_x8() 38 __m128 vy4567 = _mm_max_ps(va4567, vb); in xnn_f32_vmaxc_ukernel__sse_x8() 40 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vmaxc_ukernel__sse_x8() 41 vy4567 = _mm_max_ps(vy4567, vy_min); in xnn_f32_vmaxc_ukernel__sse_x8() 54 __m128 vy0123 = _mm_max_ps(va0123, vb); in xnn_f32_vmaxc_ukernel__sse_x8() 55 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vmaxc_ukernel__sse_x8() 63 __m128 vy0123 = _mm_max_ps(va0123, vb); in xnn_f32_vmaxc_ukernel__sse_x8() 64 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vmaxc_ukernel__sse_x8()
|
D | vmax-sse-x8.c | 40 __m128 vy0123 = _mm_max_ps(va0123, vb0123); in xnn_f32_vmax_ukernel__sse_x8() 41 __m128 vy4567 = _mm_max_ps(va4567, vb4567); in xnn_f32_vmax_ukernel__sse_x8() 43 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vmax_ukernel__sse_x8() 44 vy4567 = _mm_max_ps(vy4567, vy_min); in xnn_f32_vmax_ukernel__sse_x8() 60 __m128 vy0123 = _mm_max_ps(va0123, vb0123); in xnn_f32_vmax_ukernel__sse_x8() 61 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vmax_ukernel__sse_x8() 70 __m128 vy0123 = _mm_max_ps(va0123, vb0123); in xnn_f32_vmax_ukernel__sse_x8() 71 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vmax_ukernel__sse_x8()
|
D | vmaxc-sse-x4.c | 36 __m128 vy0123 = _mm_max_ps(va0123, vb); in xnn_f32_vmaxc_ukernel__sse_x4() 38 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vmaxc_ukernel__sse_x4() 49 __m128 vy0123 = _mm_max_ps(va0123, vb); in xnn_f32_vmaxc_ukernel__sse_x4() 50 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vmaxc_ukernel__sse_x4() 58 __m128 vy0123 = _mm_max_ps(va0123, vb); in xnn_f32_vmaxc_ukernel__sse_x4() 59 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vmaxc_ukernel__sse_x4()
|
D | vmax-sse-x4.c | 38 __m128 vy0123 = _mm_max_ps(va0123, vb0123); in xnn_f32_vmax_ukernel__sse_x4() 40 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vmax_ukernel__sse_x4() 54 __m128 vy0123 = _mm_max_ps(va0123, vb0123); in xnn_f32_vmax_ukernel__sse_x4() 55 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vmax_ukernel__sse_x4() 64 __m128 vy0123 = _mm_max_ps(va0123, vb0123); in xnn_f32_vmax_ukernel__sse_x4() 65 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vmax_ukernel__sse_x4()
|
D | vrsubc-sse-x8.c | 40 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vrsubc_ukernel__sse_x8() 41 vy4567 = _mm_max_ps(vy4567, vy_min); in xnn_f32_vrsubc_ukernel__sse_x8() 55 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vrsubc_ukernel__sse_x8() 64 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vrsubc_ukernel__sse_x8()
|
D | vmulc-sse-x8.c | 40 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vmulc_ukernel__sse_x8() 41 vy4567 = _mm_max_ps(vy4567, vy_min); in xnn_f32_vmulc_ukernel__sse_x8() 55 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vmulc_ukernel__sse_x8() 64 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vmulc_ukernel__sse_x8()
|
D | vsubc-sse-x8.c | 40 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vsubc_ukernel__sse_x8() 41 vy4567 = _mm_max_ps(vy4567, vy_min); in xnn_f32_vsubc_ukernel__sse_x8() 55 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vsubc_ukernel__sse_x8() 64 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vsubc_ukernel__sse_x8()
|
D | vminc-sse-x8.c | 40 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vminc_ukernel__sse_x8() 41 vy4567 = _mm_max_ps(vy4567, vy_min); in xnn_f32_vminc_ukernel__sse_x8() 55 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vminc_ukernel__sse_x8() 64 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vminc_ukernel__sse_x8()
|
D | vrdivc-sse-x8.c | 40 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vrdivc_ukernel__sse_x8() 41 vy4567 = _mm_max_ps(vy4567, vy_min); in xnn_f32_vrdivc_ukernel__sse_x8() 55 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vrdivc_ukernel__sse_x8() 64 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vrdivc_ukernel__sse_x8()
|
D | vdivc-sse-x8.c | 40 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vdivc_ukernel__sse_x8() 41 vy4567 = _mm_max_ps(vy4567, vy_min); in xnn_f32_vdivc_ukernel__sse_x8() 55 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vdivc_ukernel__sse_x8() 64 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vdivc_ukernel__sse_x8()
|
D | vaddc-sse-x8.c | 40 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vaddc_ukernel__sse_x8() 41 vy4567 = _mm_max_ps(vy4567, vy_min); in xnn_f32_vaddc_ukernel__sse_x8() 55 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vaddc_ukernel__sse_x8() 64 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vaddc_ukernel__sse_x8()
|
D | vmin-sse-x8.c | 43 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vmin_ukernel__sse_x8() 44 vy4567 = _mm_max_ps(vy4567, vy_min); in xnn_f32_vmin_ukernel__sse_x8() 61 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vmin_ukernel__sse_x8() 71 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vmin_ukernel__sse_x8()
|
D | vdiv-sse-x8.c | 43 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vdiv_ukernel__sse_x8() 44 vy4567 = _mm_max_ps(vy4567, vy_min); in xnn_f32_vdiv_ukernel__sse_x8() 61 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vdiv_ukernel__sse_x8() 71 vy0123 = _mm_max_ps(vy0123, vy_min); in xnn_f32_vdiv_ukernel__sse_x8()
|
/external/XNNPACK/src/f32-prelu/gen/ |
D | sse41-2x8.c | 71 vacc0x0123 = _mm_max_ps(vacc0x0123, vmin); in xnn_f32_prelu_ukernel__sse41_2x8() 72 vacc0x4567 = _mm_max_ps(vacc0x4567, vmin); in xnn_f32_prelu_ukernel__sse41_2x8() 73 vacc1x0123 = _mm_max_ps(vacc1x0123, vmin); in xnn_f32_prelu_ukernel__sse41_2x8() 74 vacc1x4567 = _mm_max_ps(vacc1x4567, vmin); in xnn_f32_prelu_ukernel__sse41_2x8() 103 vacc0x0123 = _mm_max_ps(vacc0x0123, vmin); in xnn_f32_prelu_ukernel__sse41_2x8() 104 vacc1x0123 = _mm_max_ps(vacc1x0123, vmin); in xnn_f32_prelu_ukernel__sse41_2x8() 129 vacc0x0123 = _mm_max_ps(vacc0x0123, vmin); in xnn_f32_prelu_ukernel__sse41_2x8() 130 vacc1x0123 = _mm_max_ps(vacc1x0123, vmin); in xnn_f32_prelu_ukernel__sse41_2x8()
|
D | sse2-2x8.c | 75 vacc0x0123 = _mm_max_ps(vacc0x0123, vmin); in xnn_f32_prelu_ukernel__sse2_2x8() 76 vacc0x4567 = _mm_max_ps(vacc0x4567, vmin); in xnn_f32_prelu_ukernel__sse2_2x8() 77 vacc1x0123 = _mm_max_ps(vacc1x0123, vmin); in xnn_f32_prelu_ukernel__sse2_2x8() 78 vacc1x4567 = _mm_max_ps(vacc1x4567, vmin); in xnn_f32_prelu_ukernel__sse2_2x8() 109 vacc0x0123 = _mm_max_ps(vacc0x0123, vmin); in xnn_f32_prelu_ukernel__sse2_2x8() 110 vacc1x0123 = _mm_max_ps(vacc1x0123, vmin); in xnn_f32_prelu_ukernel__sse2_2x8() 137 vacc0x0123 = _mm_max_ps(vacc0x0123, vmin); in xnn_f32_prelu_ukernel__sse2_2x8() 138 vacc1x0123 = _mm_max_ps(vacc1x0123, vmin); in xnn_f32_prelu_ukernel__sse2_2x8()
|
/external/XNNPACK/src/f32-vmulcaddc/gen/ |
D | c8-sse-2x.c | 73 vacc0x0123 = _mm_max_ps(vacc0x0123, vmin); in xnn_f32_vmulcaddc_ukernel_c8__sse_2x() 74 vacc0x4567 = _mm_max_ps(vacc0x4567, vmin); in xnn_f32_vmulcaddc_ukernel_c8__sse_2x() 75 vacc1x0123 = _mm_max_ps(vacc1x0123, vmin); in xnn_f32_vmulcaddc_ukernel_c8__sse_2x() 76 vacc1x4567 = _mm_max_ps(vacc1x4567, vmin); in xnn_f32_vmulcaddc_ukernel_c8__sse_2x() 108 vacc0x0123 = _mm_max_ps(vacc0x0123, vmin); in xnn_f32_vmulcaddc_ukernel_c8__sse_2x() 109 vacc1x0123 = _mm_max_ps(vacc1x0123, vmin); in xnn_f32_vmulcaddc_ukernel_c8__sse_2x() 137 vacc0x0123 = _mm_max_ps(vacc0x0123, vmin); in xnn_f32_vmulcaddc_ukernel_c8__sse_2x() 138 vacc1x0123 = _mm_max_ps(vacc1x0123, vmin); in xnn_f32_vmulcaddc_ukernel_c8__sse_2x()
|
/external/XNNPACK/src/f32-ppmm/gen/ |
D | 4x8-sse.c | 96 vacc0x0123 = _mm_max_ps(vacc0x0123, vmin); in xnn_f32_ppmm_ukernel_4x8__sse() 97 vacc1x0123 = _mm_max_ps(vacc1x0123, vmin); in xnn_f32_ppmm_ukernel_4x8__sse() 98 vacc2x0123 = _mm_max_ps(vacc2x0123, vmin); in xnn_f32_ppmm_ukernel_4x8__sse() 99 vacc3x0123 = _mm_max_ps(vacc3x0123, vmin); in xnn_f32_ppmm_ukernel_4x8__sse() 100 vacc0x4567 = _mm_max_ps(vacc0x4567, vmin); in xnn_f32_ppmm_ukernel_4x8__sse() 101 vacc1x4567 = _mm_max_ps(vacc1x4567, vmin); in xnn_f32_ppmm_ukernel_4x8__sse() 102 vacc2x4567 = _mm_max_ps(vacc2x4567, vmin); in xnn_f32_ppmm_ukernel_4x8__sse() 103 vacc3x4567 = _mm_max_ps(vacc3x4567, vmin); in xnn_f32_ppmm_ukernel_4x8__sse()
|
/external/XNNPACK/src/f32-gemm/gen/ |
D | 4x8-sse-load1.c | 108 vacc0x0123 = _mm_max_ps(vacc0x0123, vmin); in xnn_f32_gemm_ukernel_4x8__sse_load1() 109 vacc1x0123 = _mm_max_ps(vacc1x0123, vmin); in xnn_f32_gemm_ukernel_4x8__sse_load1() 110 vacc2x0123 = _mm_max_ps(vacc2x0123, vmin); in xnn_f32_gemm_ukernel_4x8__sse_load1() 111 vacc3x0123 = _mm_max_ps(vacc3x0123, vmin); in xnn_f32_gemm_ukernel_4x8__sse_load1() 112 vacc0x4567 = _mm_max_ps(vacc0x4567, vmin); in xnn_f32_gemm_ukernel_4x8__sse_load1() 113 vacc1x4567 = _mm_max_ps(vacc1x4567, vmin); in xnn_f32_gemm_ukernel_4x8__sse_load1() 114 vacc2x4567 = _mm_max_ps(vacc2x4567, vmin); in xnn_f32_gemm_ukernel_4x8__sse_load1() 115 vacc3x4567 = _mm_max_ps(vacc3x4567, vmin); in xnn_f32_gemm_ukernel_4x8__sse_load1()
|
/external/XNNPACK/src/f32-igemm/gen/ |
D | 4x8-sse-load1.c | 131 vacc0x0123 = _mm_max_ps(vacc0x0123, vmin); in xnn_f32_igemm_ukernel_4x8__sse_load1() 132 vacc1x0123 = _mm_max_ps(vacc1x0123, vmin); in xnn_f32_igemm_ukernel_4x8__sse_load1() 133 vacc2x0123 = _mm_max_ps(vacc2x0123, vmin); in xnn_f32_igemm_ukernel_4x8__sse_load1() 134 vacc3x0123 = _mm_max_ps(vacc3x0123, vmin); in xnn_f32_igemm_ukernel_4x8__sse_load1() 135 vacc0x4567 = _mm_max_ps(vacc0x4567, vmin); in xnn_f32_igemm_ukernel_4x8__sse_load1() 136 vacc1x4567 = _mm_max_ps(vacc1x4567, vmin); in xnn_f32_igemm_ukernel_4x8__sse_load1() 137 vacc2x4567 = _mm_max_ps(vacc2x4567, vmin); in xnn_f32_igemm_ukernel_4x8__sse_load1() 138 vacc3x4567 = _mm_max_ps(vacc3x4567, vmin); in xnn_f32_igemm_ukernel_4x8__sse_load1()
|
/external/XNNPACK/src/f32-gemm/gen-inc/ |
D | 4x8-sse-load1.c | 110 vacc0x0123 = _mm_max_ps(vacc0x0123, vmin); in xnn_f32_gemminc_ukernel_4x8__sse_load1() 111 vacc1x0123 = _mm_max_ps(vacc1x0123, vmin); in xnn_f32_gemminc_ukernel_4x8__sse_load1() 112 vacc2x0123 = _mm_max_ps(vacc2x0123, vmin); in xnn_f32_gemminc_ukernel_4x8__sse_load1() 113 vacc3x0123 = _mm_max_ps(vacc3x0123, vmin); in xnn_f32_gemminc_ukernel_4x8__sse_load1() 114 vacc0x4567 = _mm_max_ps(vacc0x4567, vmin); in xnn_f32_gemminc_ukernel_4x8__sse_load1() 115 vacc1x4567 = _mm_max_ps(vacc1x4567, vmin); in xnn_f32_gemminc_ukernel_4x8__sse_load1() 116 vacc2x4567 = _mm_max_ps(vacc2x4567, vmin); in xnn_f32_gemminc_ukernel_4x8__sse_load1() 117 vacc3x4567 = _mm_max_ps(vacc3x4567, vmin); in xnn_f32_gemminc_ukernel_4x8__sse_load1()
|