/external/XNNPACK/src/f32-prelu/gen/ |
D | sse41-2x8.c | 63 const __m128 vacc0x0123 = _mm_blendv_ps(vi0x0123, vprod0x0123, vi0x0123); in xnn_f32_prelu_ukernel__sse41_2x8() 64 const __m128 vacc0x4567 = _mm_blendv_ps(vi0x4567, vprod0x4567, vi0x4567); in xnn_f32_prelu_ukernel__sse41_2x8() 65 const __m128 vacc1x0123 = _mm_blendv_ps(vi1x0123, vprod1x0123, vi1x0123); in xnn_f32_prelu_ukernel__sse41_2x8() 66 const __m128 vacc1x4567 = _mm_blendv_ps(vi1x4567, vprod1x4567, vi1x4567); in xnn_f32_prelu_ukernel__sse41_2x8() 87 __m128 vacc0x0123 = _mm_blendv_ps(vi0x0123, vprod0x0123, vi0x0123); in xnn_f32_prelu_ukernel__sse41_2x8() 88 __m128 vacc1x0123 = _mm_blendv_ps(vi1x0123, vprod1x0123, vi1x0123); in xnn_f32_prelu_ukernel__sse41_2x8() 107 __m128 vacc0x0123 = _mm_blendv_ps(vi0x0123, vprod0x0123, vi0x0123); in xnn_f32_prelu_ukernel__sse41_2x8() 108 __m128 vacc1x0123 = _mm_blendv_ps(vi1x0123, vprod1x0123, vi1x0123); in xnn_f32_prelu_ukernel__sse41_2x8()
|
D | sse41-2x4.c | 58 const __m128 vacc0x0123 = _mm_blendv_ps(vi0x0123, vprod0x0123, vi0x0123); in xnn_f32_prelu_ukernel__sse41_2x4() 59 const __m128 vacc1x0123 = _mm_blendv_ps(vi1x0123, vprod1x0123, vi1x0123); in xnn_f32_prelu_ukernel__sse41_2x4() 78 __m128 vacc0x0123 = _mm_blendv_ps(vi0x0123, vprod0x0123, vi0x0123); in xnn_f32_prelu_ukernel__sse41_2x4() 79 __m128 vacc1x0123 = _mm_blendv_ps(vi1x0123, vprod1x0123, vi1x0123); in xnn_f32_prelu_ukernel__sse41_2x4()
|
/external/XNNPACK/src/f32-vlrelu/gen/ |
D | vlrelu-sse41-x8.c | 36 vacc0123 = _mm_blendv_ps(vx0123, vacc0123, vx0123); in xnn_f32_vlrelu_ukernel__sse41_x8() 37 vacc4567 = _mm_blendv_ps(vx4567, vacc4567, vx4567); in xnn_f32_vlrelu_ukernel__sse41_x8() 48 vacc = _mm_blendv_ps(vx, vacc, vx); in xnn_f32_vlrelu_ukernel__sse41_x8() 57 vacc = _mm_blendv_ps(vx, vacc, vx); in xnn_f32_vlrelu_ukernel__sse41_x8()
|
D | vlrelu-sse41-x4.c | 34 vacc0123 = _mm_blendv_ps(vx0123, vacc0123, vx0123); in xnn_f32_vlrelu_ukernel__sse41_x4() 43 vacc = _mm_blendv_ps(vx, vacc, vx); in xnn_f32_vlrelu_ukernel__sse41_x4()
|
/external/XNNPACK/src/f32-sigmoid/gen/ |
D | sse41-p5-div-x24.c | 152 vf0123 = _mm_blendv_ps(_mm_sub_ps(vone, vf0123), vf0123, vx0123); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24() 153 vf4567 = _mm_blendv_ps(_mm_sub_ps(vone, vf4567), vf4567, vx4567); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24() 154 vf89AB = _mm_blendv_ps(_mm_sub_ps(vone, vf89AB), vf89AB, vx89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24() 155 vfCDEF = _mm_blendv_ps(_mm_sub_ps(vone, vfCDEF), vfCDEF, vxCDEF); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24() 156 vfGHIJ = _mm_blendv_ps(_mm_sub_ps(vone, vfGHIJ), vfGHIJ, vxGHIJ); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24() 157 vfKLMN = _mm_blendv_ps(_mm_sub_ps(vone, vfKLMN), vfKLMN, vxKLMN); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24() 193 vf = _mm_blendv_ps(_mm_sub_ps(vone, vf), vf, vx); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24() 224 vf = _mm_blendv_ps(_mm_sub_ps(vone, vf), vf, vx); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x24()
|
D | sse41-p5-div-x20.c | 136 vf0123 = _mm_blendv_ps(_mm_sub_ps(vone, vf0123), vf0123, vx0123); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20() 137 vf4567 = _mm_blendv_ps(_mm_sub_ps(vone, vf4567), vf4567, vx4567); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20() 138 vf89AB = _mm_blendv_ps(_mm_sub_ps(vone, vf89AB), vf89AB, vx89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20() 139 vfCDEF = _mm_blendv_ps(_mm_sub_ps(vone, vfCDEF), vfCDEF, vxCDEF); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20() 140 vfGHIJ = _mm_blendv_ps(_mm_sub_ps(vone, vfGHIJ), vfGHIJ, vxGHIJ); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20() 175 vf = _mm_blendv_ps(_mm_sub_ps(vone, vf), vf, vx); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20() 206 vf = _mm_blendv_ps(_mm_sub_ps(vone, vf), vf, vx); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x20()
|
D | sse41-p5-div-x16.c | 120 vf0123 = _mm_blendv_ps(_mm_sub_ps(vone, vf0123), vf0123, vx0123); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16() 121 vf4567 = _mm_blendv_ps(_mm_sub_ps(vone, vf4567), vf4567, vx4567); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16() 122 vf89AB = _mm_blendv_ps(_mm_sub_ps(vone, vf89AB), vf89AB, vx89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16() 123 vfCDEF = _mm_blendv_ps(_mm_sub_ps(vone, vfCDEF), vfCDEF, vxCDEF); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16() 157 vf = _mm_blendv_ps(_mm_sub_ps(vone, vf), vf, vx); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16() 188 vf = _mm_blendv_ps(_mm_sub_ps(vone, vf), vf, vx); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x16()
|
D | sse41-p5-div-x12.c | 104 vf0123 = _mm_blendv_ps(_mm_sub_ps(vone, vf0123), vf0123, vx0123); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12() 105 vf4567 = _mm_blendv_ps(_mm_sub_ps(vone, vf4567), vf4567, vx4567); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12() 106 vf89AB = _mm_blendv_ps(_mm_sub_ps(vone, vf89AB), vf89AB, vx89AB); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12() 139 vf = _mm_blendv_ps(_mm_sub_ps(vone, vf), vf, vx); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12() 170 vf = _mm_blendv_ps(_mm_sub_ps(vone, vf), vf, vx); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x12()
|
D | sse41-p5-div-x8.c | 88 vf0123 = _mm_blendv_ps(_mm_sub_ps(vone, vf0123), vf0123, vx0123); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8() 89 vf4567 = _mm_blendv_ps(_mm_sub_ps(vone, vf4567), vf4567, vx4567); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8() 121 vf = _mm_blendv_ps(_mm_sub_ps(vone, vf), vf, vx); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8() 152 vf = _mm_blendv_ps(_mm_sub_ps(vone, vf), vf, vx); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x8()
|
D | sse41-lut64-p2-div-x24.c | 238 vf0123 = _mm_blendv_ps(_mm_sub_ps(vone, vf0123), vf0123, vx0123); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x24() 239 vf4567 = _mm_blendv_ps(_mm_sub_ps(vone, vf4567), vf4567, vx4567); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x24() 240 vf89AB = _mm_blendv_ps(_mm_sub_ps(vone, vf89AB), vf89AB, vx89AB); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x24() 241 vfCDEF = _mm_blendv_ps(_mm_sub_ps(vone, vfCDEF), vfCDEF, vxCDEF); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x24() 242 vfGHIJ = _mm_blendv_ps(_mm_sub_ps(vone, vfGHIJ), vfGHIJ, vxGHIJ); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x24() 243 vfKLMN = _mm_blendv_ps(_mm_sub_ps(vone, vfKLMN), vfKLMN, vxKLMN); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x24() 291 vf = _mm_blendv_ps(_mm_sub_ps(vone, vf), vf, vx); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x24() 333 vf = _mm_blendv_ps(_mm_sub_ps(vone, vf), vf, vx); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x24()
|
D | sse41-lut64-p2-div-x16.c | 178 vf0123 = _mm_blendv_ps(_mm_sub_ps(vone, vf0123), vf0123, vx0123); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x16() 179 vf4567 = _mm_blendv_ps(_mm_sub_ps(vone, vf4567), vf4567, vx4567); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x16() 180 vf89AB = _mm_blendv_ps(_mm_sub_ps(vone, vf89AB), vf89AB, vx89AB); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x16() 181 vfCDEF = _mm_blendv_ps(_mm_sub_ps(vone, vfCDEF), vfCDEF, vxCDEF); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x16() 227 vf = _mm_blendv_ps(_mm_sub_ps(vone, vf), vf, vx); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x16() 269 vf = _mm_blendv_ps(_mm_sub_ps(vone, vf), vf, vx); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x16()
|
D | sse41-lut64-p2-div-x20.c | 208 vf0123 = _mm_blendv_ps(_mm_sub_ps(vone, vf0123), vf0123, vx0123); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x20() 209 vf4567 = _mm_blendv_ps(_mm_sub_ps(vone, vf4567), vf4567, vx4567); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x20() 210 vf89AB = _mm_blendv_ps(_mm_sub_ps(vone, vf89AB), vf89AB, vx89AB); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x20() 211 vfCDEF = _mm_blendv_ps(_mm_sub_ps(vone, vfCDEF), vfCDEF, vxCDEF); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x20() 212 vfGHIJ = _mm_blendv_ps(_mm_sub_ps(vone, vfGHIJ), vfGHIJ, vxGHIJ); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x20() 259 vf = _mm_blendv_ps(_mm_sub_ps(vone, vf), vf, vx); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x20() 301 vf = _mm_blendv_ps(_mm_sub_ps(vone, vf), vf, vx); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x20()
|
D | sse41-lut64-p2-div-x12.c | 148 vf0123 = _mm_blendv_ps(_mm_sub_ps(vone, vf0123), vf0123, vx0123); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x12() 149 vf4567 = _mm_blendv_ps(_mm_sub_ps(vone, vf4567), vf4567, vx4567); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x12() 150 vf89AB = _mm_blendv_ps(_mm_sub_ps(vone, vf89AB), vf89AB, vx89AB); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x12() 195 vf = _mm_blendv_ps(_mm_sub_ps(vone, vf), vf, vx); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x12() 237 vf = _mm_blendv_ps(_mm_sub_ps(vone, vf), vf, vx); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x12()
|
D | sse41-lut64-p2-div-x8.c | 118 vf0123 = _mm_blendv_ps(_mm_sub_ps(vone, vf0123), vf0123, vx0123); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x8() 119 vf4567 = _mm_blendv_ps(_mm_sub_ps(vone, vf4567), vf4567, vx4567); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x8() 163 vf = _mm_blendv_ps(_mm_sub_ps(vone, vf), vf, vx); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x8() 205 vf = _mm_blendv_ps(_mm_sub_ps(vone, vf), vf, vx); in xnn_f32_sigmoid_ukernel__sse41_lut64_p2_div_x8()
|
D | sse41-p5-div-x4.c | 63 vf = _mm_blendv_ps(_mm_sub_ps(vone, vf), vf, vx); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x4() 94 vf = _mm_blendv_ps(_mm_sub_ps(vone, vf), vf, vx); in xnn_f32_sigmoid_ukernel__sse41_p5_div_x4()
|
/external/XNNPACK/src/f32-velu/gen/ |
D | velu-sse41-rr2-p6-x24.c | 165 const __m128 vy0123 = _mm_blendv_ps(vx0123, ve0123, vx0123); in xnn_f32_velu_ukernel__sse41_rr2_p6_x24() 166 const __m128 vy4567 = _mm_blendv_ps(vx4567, ve4567, vx4567); in xnn_f32_velu_ukernel__sse41_rr2_p6_x24() 167 const __m128 vy89AB = _mm_blendv_ps(vx89AB, ve89AB, vx89AB); in xnn_f32_velu_ukernel__sse41_rr2_p6_x24() 168 const __m128 vyCDEF = _mm_blendv_ps(vxCDEF, veCDEF, vxCDEF); in xnn_f32_velu_ukernel__sse41_rr2_p6_x24() 169 const __m128 vyGHIJ = _mm_blendv_ps(vxGHIJ, veGHIJ, vxGHIJ); in xnn_f32_velu_ukernel__sse41_rr2_p6_x24() 170 const __m128 vyKLMN = _mm_blendv_ps(vxKLMN, veKLMN, vxKLMN); in xnn_f32_velu_ukernel__sse41_rr2_p6_x24() 205 const __m128 vy = _mm_blendv_ps(vx, ve, vx); in xnn_f32_velu_ukernel__sse41_rr2_p6_x24() 234 __m128 vy = _mm_blendv_ps(vx, ve, vx); in xnn_f32_velu_ukernel__sse41_rr2_p6_x24()
|
D | velu-sse41-rr2-p6-x20.c | 148 const __m128 vy0123 = _mm_blendv_ps(vx0123, ve0123, vx0123); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20() 149 const __m128 vy4567 = _mm_blendv_ps(vx4567, ve4567, vx4567); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20() 150 const __m128 vy89AB = _mm_blendv_ps(vx89AB, ve89AB, vx89AB); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20() 151 const __m128 vyCDEF = _mm_blendv_ps(vxCDEF, veCDEF, vxCDEF); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20() 152 const __m128 vyGHIJ = _mm_blendv_ps(vxGHIJ, veGHIJ, vxGHIJ); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20() 186 const __m128 vy = _mm_blendv_ps(vx, ve, vx); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20() 215 __m128 vy = _mm_blendv_ps(vx, ve, vx); in xnn_f32_velu_ukernel__sse41_rr2_p6_x20()
|
D | velu-sse41-rr2-p6-x16.c | 131 const __m128 vy0123 = _mm_blendv_ps(vx0123, ve0123, vx0123); in xnn_f32_velu_ukernel__sse41_rr2_p6_x16() 132 const __m128 vy4567 = _mm_blendv_ps(vx4567, ve4567, vx4567); in xnn_f32_velu_ukernel__sse41_rr2_p6_x16() 133 const __m128 vy89AB = _mm_blendv_ps(vx89AB, ve89AB, vx89AB); in xnn_f32_velu_ukernel__sse41_rr2_p6_x16() 134 const __m128 vyCDEF = _mm_blendv_ps(vxCDEF, veCDEF, vxCDEF); in xnn_f32_velu_ukernel__sse41_rr2_p6_x16() 167 const __m128 vy = _mm_blendv_ps(vx, ve, vx); in xnn_f32_velu_ukernel__sse41_rr2_p6_x16() 196 __m128 vy = _mm_blendv_ps(vx, ve, vx); in xnn_f32_velu_ukernel__sse41_rr2_p6_x16()
|
D | velu-sse41-rr2-p6-x12.c | 114 const __m128 vy0123 = _mm_blendv_ps(vx0123, ve0123, vx0123); in xnn_f32_velu_ukernel__sse41_rr2_p6_x12() 115 const __m128 vy4567 = _mm_blendv_ps(vx4567, ve4567, vx4567); in xnn_f32_velu_ukernel__sse41_rr2_p6_x12() 116 const __m128 vy89AB = _mm_blendv_ps(vx89AB, ve89AB, vx89AB); in xnn_f32_velu_ukernel__sse41_rr2_p6_x12() 148 const __m128 vy = _mm_blendv_ps(vx, ve, vx); in xnn_f32_velu_ukernel__sse41_rr2_p6_x12() 177 __m128 vy = _mm_blendv_ps(vx, ve, vx); in xnn_f32_velu_ukernel__sse41_rr2_p6_x12()
|
D | velu-sse41-rr2-p6-x8.c | 97 const __m128 vy0123 = _mm_blendv_ps(vx0123, ve0123, vx0123); in xnn_f32_velu_ukernel__sse41_rr2_p6_x8() 98 const __m128 vy4567 = _mm_blendv_ps(vx4567, ve4567, vx4567); in xnn_f32_velu_ukernel__sse41_rr2_p6_x8() 129 const __m128 vy = _mm_blendv_ps(vx, ve, vx); in xnn_f32_velu_ukernel__sse41_rr2_p6_x8() 158 __m128 vy = _mm_blendv_ps(vx, ve, vx); in xnn_f32_velu_ukernel__sse41_rr2_p6_x8()
|
D | velu-sse41-rr2-lut16-p3-x20.c | 224 const __m128 vy0123 = _mm_blendv_ps(vx0123, ve0123, vx0123); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20() 225 const __m128 vy4567 = _mm_blendv_ps(vx4567, ve4567, vx4567); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20() 226 const __m128 vy89AB = _mm_blendv_ps(vx89AB, ve89AB, vx89AB); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20() 227 const __m128 vyCDEF = _mm_blendv_ps(vxCDEF, veCDEF, vxCDEF); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20() 228 const __m128 vyGHIJ = _mm_blendv_ps(vxGHIJ, veGHIJ, vxGHIJ); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20() 276 const __m128 vy = _mm_blendv_ps(vx, ve, vx); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20() 319 __m128 vy = _mm_blendv_ps(vx, ve, vx); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20()
|
D | velu-sse41-rr2-lut16-p3-x24.c | 256 const __m128 vy0123 = _mm_blendv_ps(vx0123, ve0123, vx0123); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24() 257 const __m128 vy4567 = _mm_blendv_ps(vx4567, ve4567, vx4567); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24() 258 const __m128 vy89AB = _mm_blendv_ps(vx89AB, ve89AB, vx89AB); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24() 259 const __m128 vyCDEF = _mm_blendv_ps(vxCDEF, veCDEF, vxCDEF); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24() 260 const __m128 vyGHIJ = _mm_blendv_ps(vxGHIJ, veGHIJ, vxGHIJ); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24() 261 const __m128 vyKLMN = _mm_blendv_ps(vxKLMN, veKLMN, vxKLMN); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24() 310 const __m128 vy = _mm_blendv_ps(vx, ve, vx); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24() 353 __m128 vy = _mm_blendv_ps(vx, ve, vx); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24()
|
D | velu-sse41-rr2-lut16-p3-x16.c | 192 const __m128 vy0123 = _mm_blendv_ps(vx0123, ve0123, vx0123); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16() 193 const __m128 vy4567 = _mm_blendv_ps(vx4567, ve4567, vx4567); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16() 194 const __m128 vy89AB = _mm_blendv_ps(vx89AB, ve89AB, vx89AB); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16() 195 const __m128 vyCDEF = _mm_blendv_ps(vxCDEF, veCDEF, vxCDEF); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16() 242 const __m128 vy = _mm_blendv_ps(vx, ve, vx); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16() 285 __m128 vy = _mm_blendv_ps(vx, ve, vx); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16()
|
D | velu-sse41-rr2-lut16-p3-x12.c | 160 const __m128 vy0123 = _mm_blendv_ps(vx0123, ve0123, vx0123); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12() 161 const __m128 vy4567 = _mm_blendv_ps(vx4567, ve4567, vx4567); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12() 162 const __m128 vy89AB = _mm_blendv_ps(vx89AB, ve89AB, vx89AB); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12() 208 const __m128 vy = _mm_blendv_ps(vx, ve, vx); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12() 251 __m128 vy = _mm_blendv_ps(vx, ve, vx); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12()
|
/external/XNNPACK/src/f32-vlrelu/ |
D | sse.c.in | 58 vacc${ABC[N:N+4]} = _mm_blendv_ps(vx${ABC[N:N+4]}, vacc${ABC[N:N+4]}, vx${ABC[N:N+4]}); 83 vacc = _mm_blendv_ps(vx, vacc, vx); 103 vacc = _mm_blendv_ps(vx, vacc, vx);
|