1 // Auto-generated file. Do not edit! 2 // Template: src/f32-vsqrt/neon-sqrt.c.in 3 // Generator: tools/xngen 4 // 5 // Copyright 2020 Google LLC 6 // 7 // This source code is licensed under the BSD-style license found in the 8 // LICENSE file in the root directory of this source tree. 9 10 #include <assert.h> 11 #include <math.h> 12 13 #include <arm_neon.h> 14 15 #include <xnnpack/common.h> 16 #include <xnnpack/vunary.h> 17 18 xnn_f32_vsqrt_ukernel__neon_sqrt_x8(size_t n,const float * x,float * y,const union xnn_f32_sqrt_params params[restrict XNN_MIN_ELEMENTS (1)])19void xnn_f32_vsqrt_ukernel__neon_sqrt_x8( 20 size_t n, 21 const float* x, 22 float* y, 23 const union xnn_f32_sqrt_params params[restrict XNN_MIN_ELEMENTS(1)]) XNN_DISABLE_TSAN 24 { 25 assert(n != 0); 26 assert(n % sizeof(float) == 0); 27 28 for (; n >= 8 * sizeof(float); n -= 8 * sizeof(float)) { 29 const float32x4_t vx0123 = vld1q_f32(x); x += 4; 30 const float32x4_t vx4567 = vld1q_f32(x); x += 4; 31 32 const float32x4_t vy0123 = vsqrtq_f32(vx0123); 33 const float32x4_t vy4567 = vsqrtq_f32(vx4567); 34 35 vst1q_f32(y, vy0123); y += 4; 36 vst1q_f32(y, vy4567); y += 4; 37 } 38 for (; n >= 4 * sizeof(float); n -= 4 * sizeof(float)) { 39 const float32x4_t vx = vld1q_f32(x); x += 4; 40 const float32x4_t vy = vsqrtq_f32(vx); 41 vst1q_f32(y, vy); y += 4; 42 } 43 if XNN_UNLIKELY(n != 0) { 44 do { 45 const float vx = *x++; 46 const float vy = sqrtf(vx); 47 *y++ = vy; 48 n -= sizeof(float); 49 } while (n != 0); 50 } 51 } 52