/external/XNNPACK/src/f32-vsqrt/gen/ |
D | neonfma-nr1rsqrts1fma1adj-x32.c | 37 const float32x4_t vxSTUV = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32() local 46 float32x4_t vrsqrtxSTUV = vrsqrteq_f32(vxSTUV); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32() 64 const float32x4_t vcorrectionSTUV = vrsqrtsq_f32(vxSTUV, vrxSTUV); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32() 89 float32x4_t vsqrtxSTUV = vmulq_f32(vrsqrtxSTUV, vxSTUV); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32() 125 const float32x4_t vadjustmentSTUV = vfmsq_f32(vxSTUV, vsqrtxSTUV, vsqrtxSTUV); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32()
|
D | neonfma-nr1rsqrts1fma1adj-x36.c | 37 const float32x4_t vxSTUV = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36() local 47 float32x4_t vrsqrtxSTUV = vrsqrteq_f32(vxSTUV); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36() 67 const float32x4_t vcorrectionSTUV = vrsqrtsq_f32(vxSTUV, vrxSTUV); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36() 94 float32x4_t vsqrtxSTUV = vmulq_f32(vrsqrtxSTUV, vxSTUV); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36() 135 const float32x4_t vadjustmentSTUV = vfmsq_f32(vxSTUV, vsqrtxSTUV, vsqrtxSTUV); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36()
|
D | neonfma-nr1rsqrts1fma1adj-x40.c | 37 const float32x4_t vxSTUV = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40() local 48 float32x4_t vrsqrtxSTUV = vrsqrteq_f32(vxSTUV); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40() 70 const float32x4_t vcorrectionSTUV = vrsqrtsq_f32(vxSTUV, vrxSTUV); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40() 99 float32x4_t vsqrtxSTUV = vmulq_f32(vrsqrtxSTUV, vxSTUV); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40() 145 const float32x4_t vadjustmentSTUV = vfmsq_f32(vxSTUV, vsqrtxSTUV, vsqrtxSTUV); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40()
|
D | neonfma-nr2fma1adj-x32.c | 37 const float32x4_t vxSTUV = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32() local 46 const float32x4_t vrsqrtxSTUV = vrsqrteq_f32(vxSTUV); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32() 62 float32x4_t vsqrtxSTUV = vmulq_f32(vrsqrtxSTUV, vxSTUV); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32() 124 const float32x4_t vadjustmentSTUV = vfmsq_f32(vxSTUV, vsqrtxSTUV, vsqrtxSTUV); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32()
|
D | neonfma-nr2fma1adj-x36.c | 37 const float32x4_t vxSTUV = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36() local 47 const float32x4_t vrsqrtxSTUV = vrsqrteq_f32(vxSTUV); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36() 64 float32x4_t vsqrtxSTUV = vmulq_f32(vrsqrtxSTUV, vxSTUV); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36() 134 const float32x4_t vadjustmentSTUV = vfmsq_f32(vxSTUV, vsqrtxSTUV, vsqrtxSTUV); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36()
|
D | neonfma-nr2fma1adj-x40.c | 37 const float32x4_t vxSTUV = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() local 48 const float32x4_t vrsqrtxSTUV = vrsqrteq_f32(vxSTUV); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() 66 float32x4_t vsqrtxSTUV = vmulq_f32(vrsqrtxSTUV, vxSTUV); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() 144 const float32x4_t vadjustmentSTUV = vfmsq_f32(vxSTUV, vsqrtxSTUV, vsqrtxSTUV); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40()
|
/external/XNNPACK/src/qs8-vaddc/gen/ |
D | minmax-sse41-mul32-ld32-x32.c | 44 const __m128i vxSTUV = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 28)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32() local 55 __m128i vaccSTUV = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vxSTUV, vx_multiplier)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32()
|
D | minmax-xop-mul32-ld32-x32.c | 49 const __m128i vxSTUV = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 28)); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32() local 60 __m128i vaccSTUV = _mm_macc_epi32(vxSTUV, vx_multiplier, vzero_point_product); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32()
|
/external/XNNPACK/src/qs8-vadd/gen/ |
D | minmax-sse41-mul32-ld32-x32.c | 50 const __m128i vxSTUV = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 28)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32() local 62 __m128i vaccSTUV = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vxSTUV, vx_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32()
|
D | minmax-xop-mul32-ld32-x32.c | 55 const __m128i vxSTUV = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 28)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32() local 67 __m128i vaccSTUV = _mm_macc_epi32(vxSTUV, vx_multiplier, vzero_point_product); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32()
|