/external/XNNPACK/src/f32-vsqrt/gen/ |
D | avx512f-nr1fma1adj-x32.c | 34 const __m512 vrsqrtx0 = _mm512_rsqrt14_ps(vx0); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x32() local 37 __m512 vsqrtx0 = _mm512_mul_ps(vrsqrtx0, vx0); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x32() 38 __m512 vhalfrsqrtx0 = _mm512_mul_ps(vrsqrtx0, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x32()
|
D | fma3-nr1fma1adj-x16.c | 35 const __m256 vrsqrtx0 = _mm256_rsqrt_ps(vx0); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x16() local 38 __m256 vsqrtx0 = _mm256_mul_ps(vrsqrtx0, vx0); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x16() 39 __m256 vhalfrsqrtx0 = _mm256_mul_ps(vrsqrtx0, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x16()
|
D | avx512f-nr1fma1adj-x48.c | 35 const __m512 vrsqrtx0 = _mm512_rsqrt14_ps(vx0); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x48() local 39 __m512 vsqrtx0 = _mm512_mul_ps(vrsqrtx0, vx0); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x48() 40 __m512 vhalfrsqrtx0 = _mm512_mul_ps(vrsqrtx0, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x48()
|
D | fma3-nr1fma1adj-x24.c | 36 const __m256 vrsqrtx0 = _mm256_rsqrt_ps(vx0); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x24() local 40 __m256 vsqrtx0 = _mm256_mul_ps(vrsqrtx0, vx0); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x24() 41 __m256 vhalfrsqrtx0 = _mm256_mul_ps(vrsqrtx0, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x24()
|
D | avx512f-nr1fma1adj-x64.c | 36 const __m512 vrsqrtx0 = _mm512_rsqrt14_ps(vx0); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64() local 41 __m512 vsqrtx0 = _mm512_mul_ps(vrsqrtx0, vx0); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64() 42 __m512 vhalfrsqrtx0 = _mm512_mul_ps(vrsqrtx0, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64()
|
D | fma3-nr1fma1adj-x32.c | 37 const __m256 vrsqrtx0 = _mm256_rsqrt_ps(vx0); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x32() local 42 __m256 vsqrtx0 = _mm256_mul_ps(vrsqrtx0, vx0); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x32() 43 __m256 vhalfrsqrtx0 = _mm256_mul_ps(vrsqrtx0, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x32()
|
D | avx512f-nr1fma1adj-x80.c | 37 const __m512 vrsqrtx0 = _mm512_rsqrt14_ps(vx0); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x80() local 43 __m512 vsqrtx0 = _mm512_mul_ps(vrsqrtx0, vx0); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x80() 44 __m512 vhalfrsqrtx0 = _mm512_mul_ps(vrsqrtx0, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x80()
|
D | fma3-nr1fma1adj-x40.c | 38 const __m256 vrsqrtx0 = _mm256_rsqrt_ps(vx0); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x40() local 44 __m256 vsqrtx0 = _mm256_mul_ps(vrsqrtx0, vx0); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x40() 45 __m256 vhalfrsqrtx0 = _mm256_mul_ps(vrsqrtx0, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x40()
|
D | avx512f-nr1fma1adj-x96.c | 38 const __m512 vrsqrtx0 = _mm512_rsqrt14_ps(vx0); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96() local 45 __m512 vsqrtx0 = _mm512_mul_ps(vrsqrtx0, vx0); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96() 46 __m512 vhalfrsqrtx0 = _mm512_mul_ps(vrsqrtx0, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x96()
|
D | fma3-nr1fma1adj-x48.c | 39 const __m256 vrsqrtx0 = _mm256_rsqrt_ps(vx0); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48() local 46 __m256 vsqrtx0 = _mm256_mul_ps(vrsqrtx0, vx0); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48() 47 __m256 vhalfrsqrtx0 = _mm256_mul_ps(vrsqrtx0, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x48()
|
D | avx512f-nr1fma1adj-x112.c | 39 const __m512 vrsqrtx0 = _mm512_rsqrt14_ps(vx0); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112() local 47 __m512 vsqrtx0 = _mm512_mul_ps(vrsqrtx0, vx0); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112() 48 __m512 vhalfrsqrtx0 = _mm512_mul_ps(vrsqrtx0, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x112()
|
D | fma3-nr1fma1adj-x56.c | 40 const __m256 vrsqrtx0 = _mm256_rsqrt_ps(vx0); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56() local 48 __m256 vsqrtx0 = _mm256_mul_ps(vrsqrtx0, vx0); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56() 49 __m256 vhalfrsqrtx0 = _mm256_mul_ps(vrsqrtx0, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x56()
|
D | avx512f-nr1fma1adj-x128.c | 40 const __m512 vrsqrtx0 = _mm512_rsqrt14_ps(vx0); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128() local 49 __m512 vsqrtx0 = _mm512_mul_ps(vrsqrtx0, vx0); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128() 50 __m512 vhalfrsqrtx0 = _mm512_mul_ps(vrsqrtx0, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x128()
|
D | fma3-nr1fma1adj-x64.c | 41 const __m256 vrsqrtx0 = _mm256_rsqrt_ps(vx0); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64() local 50 __m256 vsqrtx0 = _mm256_mul_ps(vrsqrtx0, vx0); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64() 51 __m256 vhalfrsqrtx0 = _mm256_mul_ps(vrsqrtx0, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x64()
|