/external/XNNPACK/src/f32-vsqrt/ |
D | neonfma-nr2fma1adj.c.in | 68 float32x4_t vresidual = vfmsq_f32(vhalf, vsqrtx, vhalfrsqrtx); variable 83 float32x4_t vresidual = vfmsq_f32(vhalf, vsqrtx, vhalfrsqrtx); variable
|
D | neonfma-nr1rsqrts1fma1adj.c.in | 73 const float32x4_t vresidual = vfmsq_f32(vhalf, vsqrtx, vhalfrsqrtx); variable 88 const float32x4_t vresidual = vfmsq_f32(vhalf, vsqrtx, vhalfrsqrtx); variable
|
D | avx512f-nr1fma1adj.c.in | 68 const __m512 vresidual = _mm512_fnmadd_ps(vsqrtx, vhalfrsqrtx, vhalf); variable 88 const __m512 vresidual = _mm512_fnmadd_ps(vsqrtx, vhalfrsqrtx, vhalf); variable
|
D | fma3-nr1fma1adj.c.in | 67 const __m256 vresidual = _mm256_fnmadd_ps(vsqrtx, vhalfrsqrtx, vhalf); variable 86 const __m256 vresidual = _mm256_fnmadd_ps(vsqrtx, vhalfrsqrtx, vhalf); variable
|
/external/XNNPACK/src/f32-vsqrt/gen/ |
D | neonfma-nr2fma1adj-x4.c | 34 float32x4_t vresidual = vfmsq_f32(vhalf, vsqrtx, vhalfrsqrtx); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x4() local 49 float32x4_t vresidual = vfmsq_f32(vhalf, vsqrtx, vhalfrsqrtx); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x4() local
|
D | avx512f-nr1fma1adj-x16.c | 36 const __m512 vresidual = _mm512_fnmadd_ps(vsqrtx, vhalfrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x16() local 56 const __m512 vresidual = _mm512_fnmadd_ps(vsqrtx, vhalfrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x16() local
|
D | fma3-nr1fma1adj-x8.c | 35 const __m256 vresidual = _mm256_fnmadd_ps(vsqrtx, vhalfrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x8() local 54 const __m256 vresidual = _mm256_fnmadd_ps(vsqrtx, vhalfrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x8() local
|
D | neonfma-nr1rsqrts1fma1adj-x4.c | 37 const float32x4_t vresidual = vfmsq_f32(vhalf, vsqrtx, vhalfrsqrtx); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x4() local 52 const float32x4_t vresidual = vfmsq_f32(vhalf, vsqrtx, vhalfrsqrtx); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x4() local
|
D | avx512f-nr1fma1adj-x32.c | 67 const __m512 vresidual = _mm512_fnmadd_ps(vsqrtx, vhalfrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x32() local 87 const __m512 vresidual = _mm512_fnmadd_ps(vsqrtx, vhalfrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x32() local
|
D | neonfma-nr2fma1adj-x8.c | 71 float32x4_t vresidual = vfmsq_f32(vhalf, vsqrtx, vhalfrsqrtx); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x8() local 86 float32x4_t vresidual = vfmsq_f32(vhalf, vsqrtx, vhalfrsqrtx); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x8() local
|
D | fma3-nr1fma1adj-x16.c | 66 const __m256 vresidual = _mm256_fnmadd_ps(vsqrtx, vhalfrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x16() local 85 const __m256 vresidual = _mm256_fnmadd_ps(vsqrtx, vhalfrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x16() local
|
D | neonfma-nr1rsqrts1fma1adj-x8.c | 75 const float32x4_t vresidual = vfmsq_f32(vhalf, vsqrtx, vhalfrsqrtx); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x8() local 90 const float32x4_t vresidual = vfmsq_f32(vhalf, vsqrtx, vhalfrsqrtx); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x8() local
|
D | fma3-nr1fma1adj-x24.c | 76 const __m256 vresidual = _mm256_fnmadd_ps(vsqrtx, vhalfrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x24() local 95 const __m256 vresidual = _mm256_fnmadd_ps(vsqrtx, vhalfrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x24() local
|
D | neonfma-nr2fma1adj-x12.c | 84 float32x4_t vresidual = vfmsq_f32(vhalf, vsqrtx, vhalfrsqrtx); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x12() local 99 float32x4_t vresidual = vfmsq_f32(vhalf, vsqrtx, vhalfrsqrtx); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x12() local
|
D | avx512f-nr1fma1adj-x48.c | 77 const __m512 vresidual = _mm512_fnmadd_ps(vsqrtx, vhalfrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x48() local 97 const __m512 vresidual = _mm512_fnmadd_ps(vsqrtx, vhalfrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x48() local
|
D | neonfma-nr2fma1adj-x40.c | 82 float32x4_t vresidual = vfmsq_f32(vhalf, vsqrtx, vhalfrsqrtx); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() local 175 float32x4_t vresidual = vfmsq_f32(vhalf, vsqrtx, vhalfrsqrtx); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() local 190 float32x4_t vresidual = vfmsq_f32(vhalf, vsqrtx, vhalfrsqrtx); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() local
|
D | neonfma-nr2fma1adj-x16.c | 97 float32x4_t vresidual = vfmsq_f32(vhalf, vsqrtx, vhalfrsqrtx); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x16() local 112 float32x4_t vresidual = vfmsq_f32(vhalf, vsqrtx, vhalfrsqrtx); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x16() local
|
D | avx512f-nr1fma1adj-x64.c | 87 const __m512 vresidual = _mm512_fnmadd_ps(vsqrtx, vhalfrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64() local 107 const __m512 vresidual = _mm512_fnmadd_ps(vsqrtx, vhalfrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__avx512f_nr1fma1adj_x64() local
|
D | fma3-nr1fma1adj-x32.c | 86 const __m256 vresidual = _mm256_fnmadd_ps(vsqrtx, vhalfrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x32() local 105 const __m256 vresidual = _mm256_fnmadd_ps(vsqrtx, vhalfrsqrtx, vhalf); in xnn_f32_vsqrt_ukernel__fma3_nr1fma1adj_x32() local
|
D | neonfma-nr1rsqrts1fma1adj-x12.c | 88 const float32x4_t vresidual = vfmsq_f32(vhalf, vsqrtx, vhalfrsqrtx); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x12() local 103 const float32x4_t vresidual = vfmsq_f32(vhalf, vsqrtx, vhalfrsqrtx); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x12() local
|
/external/XNNPACK/src/math/ |
D | sqrt-avx512f-nr1fma.c | 35 const __m512 vresidual = _mm512_fnmadd_ps(vsqrtx, vhalfrsqrtx, vhalf); in xnn_math_f32_sqrt__avx512f_nr1fma() local
|
D | sqrt-neonfma-nr3fma.c | 35 float32x4_t vresidual = vfmsq_f32(vhalf, vsqrtx, vhalfrsqrtx); in xnn_math_f32_sqrt__neonfma_nr3fma() local
|
D | sqrt-neonfma-nr2fma.c | 35 float32x4_t vresidual = vfmsq_f32(vhalf, vsqrtx, vhalfrsqrtx); in xnn_math_f32_sqrt__neonfma_nr2fma() local
|
D | sqrt-fma3-nr1fma.c | 35 const __m256 vresidual = _mm256_fnmadd_ps(vsqrtx, vhalfrsqrtx, vhalf); in xnn_math_f32_sqrt__fma3_nr1fma() local
|
D | sqrt-fma3-nr2fma.c | 36 __m256 vresidual = _mm256_fnmadd_ps(vsqrtx, vhalfrsqrtx, vhalf); in xnn_math_f32_sqrt__fma3_nr2fma() local
|