/external/XNNPACK/src/f32-vsqrt/gen/ |
D | neonfma-nr2fma1adj-x8.c | 39 float32x4_t vhalfrsqrtx4567 = vmulq_f32(vrsqrtx4567, vhalf); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x8() local 42 float32x4_t vresidual4567 = vfmsq_f32(vhalf, vsqrtx4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x8() 46 vhalfrsqrtx4567 = vfmaq_f32(vhalfrsqrtx4567, vresidual4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x8() 50 vresidual4567 = vfmsq_f32(vhalf, vsqrtx4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x8() 54 vhalfrsqrtx4567 = vfmaq_f32(vhalfrsqrtx4567, vresidual4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x8() 61 const float32x4_t vy4567 = vfmaq_f32(vsqrtx4567, vhalfrsqrtx4567, vadjustment4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x8()
|
D | neonfma-nr2fma1adj-x12.c | 41 float32x4_t vhalfrsqrtx4567 = vmulq_f32(vrsqrtx4567, vhalf); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x12() local 46 float32x4_t vresidual4567 = vfmsq_f32(vhalf, vsqrtx4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x12() 51 vhalfrsqrtx4567 = vfmaq_f32(vhalfrsqrtx4567, vresidual4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x12() 57 vresidual4567 = vfmsq_f32(vhalf, vsqrtx4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x12() 62 vhalfrsqrtx4567 = vfmaq_f32(vhalfrsqrtx4567, vresidual4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x12() 72 const float32x4_t vy4567 = vfmaq_f32(vsqrtx4567, vhalfrsqrtx4567, vadjustment4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x12()
|
D | neonfma-nr2fma1adj-x16.c | 43 float32x4_t vhalfrsqrtx4567 = vmulq_f32(vrsqrtx4567, vhalf); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x16() local 50 float32x4_t vresidual4567 = vfmsq_f32(vhalf, vsqrtx4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x16() 56 vhalfrsqrtx4567 = vfmaq_f32(vhalfrsqrtx4567, vresidual4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x16() 64 vresidual4567 = vfmsq_f32(vhalf, vsqrtx4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x16() 70 vhalfrsqrtx4567 = vfmaq_f32(vhalfrsqrtx4567, vresidual4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x16() 83 const float32x4_t vy4567 = vfmaq_f32(vsqrtx4567, vhalfrsqrtx4567, vadjustment4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x16()
|
D | neonfma-nr2fma1adj-x20.c | 45 float32x4_t vhalfrsqrtx4567 = vmulq_f32(vrsqrtx4567, vhalf); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20() local 54 float32x4_t vresidual4567 = vfmsq_f32(vhalf, vsqrtx4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20() 61 vhalfrsqrtx4567 = vfmaq_f32(vhalfrsqrtx4567, vresidual4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20() 71 vresidual4567 = vfmsq_f32(vhalf, vsqrtx4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20() 78 vhalfrsqrtx4567 = vfmaq_f32(vhalfrsqrtx4567, vresidual4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20() 94 const float32x4_t vy4567 = vfmaq_f32(vsqrtx4567, vhalfrsqrtx4567, vadjustment4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20()
|
D | neonfma-nr1rsqrts1fma1adj-x8.c | 48 float32x4_t vhalfrsqrtx4567 = vmulq_f32(vrsqrtx4567, vhalf); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x8() local 51 const float32x4_t vresidual4567 = vfmsq_f32(vhalf, vsqrtx4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x8() 55 vhalfrsqrtx4567 = vfmaq_f32(vhalfrsqrtx4567, vresidual4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x8() 62 const float32x4_t vy4567 = vfmaq_f32(vsqrtx4567, vhalfrsqrtx4567, vadjustment4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x8()
|
D | neonfma-nr2fma1adj-x24.c | 47 float32x4_t vhalfrsqrtx4567 = vmulq_f32(vrsqrtx4567, vhalf); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24() local 58 float32x4_t vresidual4567 = vfmsq_f32(vhalf, vsqrtx4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24() 66 vhalfrsqrtx4567 = vfmaq_f32(vhalfrsqrtx4567, vresidual4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24() 78 vresidual4567 = vfmsq_f32(vhalf, vsqrtx4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24() 86 vhalfrsqrtx4567 = vfmaq_f32(vhalfrsqrtx4567, vresidual4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24() 105 const float32x4_t vy4567 = vfmaq_f32(vsqrtx4567, vhalfrsqrtx4567, vadjustment4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24()
|
D | neonfma-nr2fma1adj-x28.c | 49 float32x4_t vhalfrsqrtx4567 = vmulq_f32(vrsqrtx4567, vhalf); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28() local 62 float32x4_t vresidual4567 = vfmsq_f32(vhalf, vsqrtx4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28() 71 vhalfrsqrtx4567 = vfmaq_f32(vhalfrsqrtx4567, vresidual4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28() 85 vresidual4567 = vfmsq_f32(vhalf, vsqrtx4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28() 94 vhalfrsqrtx4567 = vfmaq_f32(vhalfrsqrtx4567, vresidual4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28() 116 const float32x4_t vy4567 = vfmaq_f32(vsqrtx4567, vhalfrsqrtx4567, vadjustment4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28()
|
D | neonfma-nr1rsqrts1fma1adj-x12.c | 53 float32x4_t vhalfrsqrtx4567 = vmulq_f32(vrsqrtx4567, vhalf); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x12() local 58 const float32x4_t vresidual4567 = vfmsq_f32(vhalf, vsqrtx4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x12() 63 vhalfrsqrtx4567 = vfmaq_f32(vhalfrsqrtx4567, vresidual4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x12() 73 const float32x4_t vy4567 = vfmaq_f32(vsqrtx4567, vhalfrsqrtx4567, vadjustment4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x12()
|
D | neonfma-nr2fma1adj-x32.c | 51 float32x4_t vhalfrsqrtx4567 = vmulq_f32(vrsqrtx4567, vhalf); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32() local 66 float32x4_t vresidual4567 = vfmsq_f32(vhalf, vsqrtx4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32() 76 vhalfrsqrtx4567 = vfmaq_f32(vhalfrsqrtx4567, vresidual4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32() 92 vresidual4567 = vfmsq_f32(vhalf, vsqrtx4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32() 102 vhalfrsqrtx4567 = vfmaq_f32(vhalfrsqrtx4567, vresidual4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32() 127 const float32x4_t vy4567 = vfmaq_f32(vsqrtx4567, vhalfrsqrtx4567, vadjustment4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32()
|
D | neonfma-nr2fma1adj-x36.c | 53 float32x4_t vhalfrsqrtx4567 = vmulq_f32(vrsqrtx4567, vhalf); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36() local 70 float32x4_t vresidual4567 = vfmsq_f32(vhalf, vsqrtx4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36() 81 vhalfrsqrtx4567 = vfmaq_f32(vhalfrsqrtx4567, vresidual4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36() 99 vresidual4567 = vfmsq_f32(vhalf, vsqrtx4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36() 110 vhalfrsqrtx4567 = vfmaq_f32(vhalfrsqrtx4567, vresidual4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36() 138 const float32x4_t vy4567 = vfmaq_f32(vsqrtx4567, vhalfrsqrtx4567, vadjustment4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36()
|
D | neonfma-nr1rsqrts1fma1adj-x16.c | 58 float32x4_t vhalfrsqrtx4567 = vmulq_f32(vrsqrtx4567, vhalf); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x16() local 65 const float32x4_t vresidual4567 = vfmsq_f32(vhalf, vsqrtx4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x16() 71 vhalfrsqrtx4567 = vfmaq_f32(vhalfrsqrtx4567, vresidual4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x16() 84 const float32x4_t vy4567 = vfmaq_f32(vsqrtx4567, vhalfrsqrtx4567, vadjustment4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x16()
|
D | neonfma-nr2fma1adj-x40.c | 55 float32x4_t vhalfrsqrtx4567 = vmulq_f32(vrsqrtx4567, vhalf); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() local 74 float32x4_t vresidual4567 = vfmsq_f32(vhalf, vsqrtx4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() 86 vhalfrsqrtx4567 = vfmaq_f32(vhalfrsqrtx4567, vresidual4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() 106 vresidual4567 = vfmsq_f32(vhalf, vsqrtx4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() 118 vhalfrsqrtx4567 = vfmaq_f32(vhalfrsqrtx4567, vresidual4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() 149 const float32x4_t vy4567 = vfmaq_f32(vsqrtx4567, vhalfrsqrtx4567, vadjustment4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40()
|
D | neonfma-nr1rsqrts1fma1adj-x20.c | 63 float32x4_t vhalfrsqrtx4567 = vmulq_f32(vrsqrtx4567, vhalf); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x20() local 72 const float32x4_t vresidual4567 = vfmsq_f32(vhalf, vsqrtx4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x20() 79 vhalfrsqrtx4567 = vfmaq_f32(vhalfrsqrtx4567, vresidual4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x20() 95 const float32x4_t vy4567 = vfmaq_f32(vsqrtx4567, vhalfrsqrtx4567, vadjustment4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x20()
|
D | neonfma-nr1rsqrts1fma1adj-x24.c | 68 float32x4_t vhalfrsqrtx4567 = vmulq_f32(vrsqrtx4567, vhalf); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24() local 79 const float32x4_t vresidual4567 = vfmsq_f32(vhalf, vsqrtx4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24() 87 vhalfrsqrtx4567 = vfmaq_f32(vhalfrsqrtx4567, vresidual4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24() 106 const float32x4_t vy4567 = vfmaq_f32(vsqrtx4567, vhalfrsqrtx4567, vadjustment4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24()
|
D | neonfma-nr1rsqrts1fma1adj-x28.c | 73 float32x4_t vhalfrsqrtx4567 = vmulq_f32(vrsqrtx4567, vhalf); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28() local 86 const float32x4_t vresidual4567 = vfmsq_f32(vhalf, vsqrtx4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28() 95 vhalfrsqrtx4567 = vfmaq_f32(vhalfrsqrtx4567, vresidual4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28() 117 const float32x4_t vy4567 = vfmaq_f32(vsqrtx4567, vhalfrsqrtx4567, vadjustment4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28()
|
D | neonfma-nr1rsqrts1fma1adj-x32.c | 78 float32x4_t vhalfrsqrtx4567 = vmulq_f32(vrsqrtx4567, vhalf); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32() local 93 const float32x4_t vresidual4567 = vfmsq_f32(vhalf, vsqrtx4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32() 103 vhalfrsqrtx4567 = vfmaq_f32(vhalfrsqrtx4567, vresidual4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32() 128 const float32x4_t vy4567 = vfmaq_f32(vsqrtx4567, vhalfrsqrtx4567, vadjustment4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32()
|
D | neonfma-nr1rsqrts1fma1adj-x36.c | 83 float32x4_t vhalfrsqrtx4567 = vmulq_f32(vrsqrtx4567, vhalf); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36() local 100 const float32x4_t vresidual4567 = vfmsq_f32(vhalf, vsqrtx4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36() 111 vhalfrsqrtx4567 = vfmaq_f32(vhalfrsqrtx4567, vresidual4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36() 139 const float32x4_t vy4567 = vfmaq_f32(vsqrtx4567, vhalfrsqrtx4567, vadjustment4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36()
|
D | neonfma-nr1rsqrts1fma1adj-x40.c | 88 float32x4_t vhalfrsqrtx4567 = vmulq_f32(vrsqrtx4567, vhalf); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40() local 107 const float32x4_t vresidual4567 = vfmsq_f32(vhalf, vsqrtx4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40() 119 vhalfrsqrtx4567 = vfmaq_f32(vhalfrsqrtx4567, vresidual4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40() 150 const float32x4_t vy4567 = vfmaq_f32(vsqrtx4567, vhalfrsqrtx4567, vadjustment4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40()
|