/external/XNNPACK/src/f32-vsqrt/gen/ |
D | neonfma-nr1rsqrts1fma1adj-x28.c | 36 const float32x4_t vxOPQR = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28() local 44 float32x4_t vrsqrtxOPQR = vrsqrteq_f32(vxOPQR); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28() 60 const float32x4_t vcorrectionOPQR = vrsqrtsq_f32(vxOPQR, vrxOPQR); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28() 82 float32x4_t vsqrtxOPQR = vmulq_f32(vrsqrtxOPQR, vxOPQR); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28() 114 const float32x4_t vadjustmentOPQR = vfmsq_f32(vxOPQR, vsqrtxOPQR, vsqrtxOPQR); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28()
|
D | neonfma-nr1rsqrts1fma1adj-x32.c | 36 const float32x4_t vxOPQR = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32() local 45 float32x4_t vrsqrtxOPQR = vrsqrteq_f32(vxOPQR); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32() 63 const float32x4_t vcorrectionOPQR = vrsqrtsq_f32(vxOPQR, vrxOPQR); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32() 87 float32x4_t vsqrtxOPQR = vmulq_f32(vrsqrtxOPQR, vxOPQR); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32() 124 const float32x4_t vadjustmentOPQR = vfmsq_f32(vxOPQR, vsqrtxOPQR, vsqrtxOPQR); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32()
|
D | neonfma-nr1rsqrts1fma1adj-x36.c | 36 const float32x4_t vxOPQR = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36() local 46 float32x4_t vrsqrtxOPQR = vrsqrteq_f32(vxOPQR); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36() 66 const float32x4_t vcorrectionOPQR = vrsqrtsq_f32(vxOPQR, vrxOPQR); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36() 92 float32x4_t vsqrtxOPQR = vmulq_f32(vrsqrtxOPQR, vxOPQR); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36() 134 const float32x4_t vadjustmentOPQR = vfmsq_f32(vxOPQR, vsqrtxOPQR, vsqrtxOPQR); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36()
|
D | neonfma-nr2fma1adj-x28.c | 36 const float32x4_t vxOPQR = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28() local 44 const float32x4_t vrsqrtxOPQR = vrsqrteq_f32(vxOPQR); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28() 58 float32x4_t vsqrtxOPQR = vmulq_f32(vrsqrtxOPQR, vxOPQR); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28() 113 const float32x4_t vadjustmentOPQR = vfmsq_f32(vxOPQR, vsqrtxOPQR, vsqrtxOPQR); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28()
|
D | neonfma-nr1rsqrts1fma1adj-x40.c | 36 const float32x4_t vxOPQR = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40() local 47 float32x4_t vrsqrtxOPQR = vrsqrteq_f32(vxOPQR); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40() 69 const float32x4_t vcorrectionOPQR = vrsqrtsq_f32(vxOPQR, vrxOPQR); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40() 97 float32x4_t vsqrtxOPQR = vmulq_f32(vrsqrtxOPQR, vxOPQR); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40() 144 const float32x4_t vadjustmentOPQR = vfmsq_f32(vxOPQR, vsqrtxOPQR, vsqrtxOPQR); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40()
|
D | neonfma-nr2fma1adj-x32.c | 36 const float32x4_t vxOPQR = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32() local 45 const float32x4_t vrsqrtxOPQR = vrsqrteq_f32(vxOPQR); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32() 60 float32x4_t vsqrtxOPQR = vmulq_f32(vrsqrtxOPQR, vxOPQR); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32() 123 const float32x4_t vadjustmentOPQR = vfmsq_f32(vxOPQR, vsqrtxOPQR, vsqrtxOPQR); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32()
|
D | neonfma-nr2fma1adj-x36.c | 36 const float32x4_t vxOPQR = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36() local 46 const float32x4_t vrsqrtxOPQR = vrsqrteq_f32(vxOPQR); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36() 62 float32x4_t vsqrtxOPQR = vmulq_f32(vrsqrtxOPQR, vxOPQR); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36() 133 const float32x4_t vadjustmentOPQR = vfmsq_f32(vxOPQR, vsqrtxOPQR, vsqrtxOPQR); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36()
|
D | neonfma-nr2fma1adj-x40.c | 36 const float32x4_t vxOPQR = vld1q_f32(x); x += 4; in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() local 47 const float32x4_t vrsqrtxOPQR = vrsqrteq_f32(vxOPQR); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() 64 float32x4_t vsqrtxOPQR = vmulq_f32(vrsqrtxOPQR, vxOPQR); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() 143 const float32x4_t vadjustmentOPQR = vfmsq_f32(vxOPQR, vsqrtxOPQR, vsqrtxOPQR); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40()
|
/external/XNNPACK/src/qs8-vaddc/gen/ |
D | minmax-sse41-mul32-ld32-x32.c | 43 const __m128i vxOPQR = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 24)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32() local 54 __m128i vaccOPQR = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vxOPQR, vx_multiplier)); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x32()
|
D | minmax-xop-mul32-ld32-x32.c | 48 const __m128i vxOPQR = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 24)); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32() local 59 __m128i vaccOPQR = _mm_macc_epi32(vxOPQR, vx_multiplier, vzero_point_product); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x32()
|
/external/XNNPACK/src/qs8-vadd/gen/ |
D | minmax-sse41-mul32-ld32-x32.c | 48 const __m128i vxOPQR = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 24)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32() local 61 __m128i vaccOPQR = _mm_add_epi32(vzero_point_product, _mm_mullo_epi32(vxOPQR, vx_multiplier)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul32_ld32_x32()
|
D | minmax-xop-mul32-ld32-x32.c | 53 const __m128i vxOPQR = _mm_cvtepi8_epi32(_mm_loadu_si32(input_x + 24)); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32() local 66 __m128i vaccOPQR = _mm_macc_epi32(vxOPQR, vx_multiplier, vzero_point_product); in xnn_qs8_vadd_minmax_ukernel__xop_mul32_ld32_x32()
|