/external/XNNPACK/src/f32-vsqrt/gen/ |
D | neonfma-nr2fma1adj-x8.c | 38 float32x4_t vsqrtx4567 = vmulq_f32(vrsqrtx4567, vx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x8() local 42 float32x4_t vresidual4567 = vfmsq_f32(vhalf, vsqrtx4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x8() 47 vsqrtx4567 = vfmaq_f32(vsqrtx4567, vresidual4567, vsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x8() 50 vresidual4567 = vfmsq_f32(vhalf, vsqrtx4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x8() 55 vsqrtx4567 = vfmaq_f32(vsqrtx4567, vresidual4567, vsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x8() 58 const float32x4_t vadjustment4567 = vfmsq_f32(vx4567, vsqrtx4567, vsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x8() 61 const float32x4_t vy4567 = vfmaq_f32(vsqrtx4567, vhalfrsqrtx4567, vadjustment4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x8()
|
D | neonfma-nr2fma1adj-x12.c | 40 float32x4_t vsqrtx4567 = vmulq_f32(vrsqrtx4567, vx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x12() local 46 float32x4_t vresidual4567 = vfmsq_f32(vhalf, vsqrtx4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x12() 52 vsqrtx4567 = vfmaq_f32(vsqrtx4567, vresidual4567, vsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x12() 57 vresidual4567 = vfmsq_f32(vhalf, vsqrtx4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x12() 63 vsqrtx4567 = vfmaq_f32(vsqrtx4567, vresidual4567, vsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x12() 68 const float32x4_t vadjustment4567 = vfmsq_f32(vx4567, vsqrtx4567, vsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x12() 72 const float32x4_t vy4567 = vfmaq_f32(vsqrtx4567, vhalfrsqrtx4567, vadjustment4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x12()
|
D | neonfma-nr2fma1adj-x16.c | 42 float32x4_t vsqrtx4567 = vmulq_f32(vrsqrtx4567, vx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x16() local 50 float32x4_t vresidual4567 = vfmsq_f32(vhalf, vsqrtx4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x16() 57 vsqrtx4567 = vfmaq_f32(vsqrtx4567, vresidual4567, vsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x16() 64 vresidual4567 = vfmsq_f32(vhalf, vsqrtx4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x16() 71 vsqrtx4567 = vfmaq_f32(vsqrtx4567, vresidual4567, vsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x16() 78 const float32x4_t vadjustment4567 = vfmsq_f32(vx4567, vsqrtx4567, vsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x16() 83 const float32x4_t vy4567 = vfmaq_f32(vsqrtx4567, vhalfrsqrtx4567, vadjustment4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x16()
|
D | neonfma-nr1rsqrts1fma1adj-x8.c | 47 float32x4_t vsqrtx4567 = vmulq_f32(vrsqrtx4567, vx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x8() local 51 const float32x4_t vresidual4567 = vfmsq_f32(vhalf, vsqrtx4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x8() 56 vsqrtx4567 = vfmaq_f32(vsqrtx4567, vresidual4567, vsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x8() 59 const float32x4_t vadjustment4567 = vfmsq_f32(vx4567, vsqrtx4567, vsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x8() 62 const float32x4_t vy4567 = vfmaq_f32(vsqrtx4567, vhalfrsqrtx4567, vadjustment4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x8()
|
D | neonfma-nr2fma1adj-x20.c | 44 float32x4_t vsqrtx4567 = vmulq_f32(vrsqrtx4567, vx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20() local 54 float32x4_t vresidual4567 = vfmsq_f32(vhalf, vsqrtx4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20() 62 vsqrtx4567 = vfmaq_f32(vsqrtx4567, vresidual4567, vsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20() 71 vresidual4567 = vfmsq_f32(vhalf, vsqrtx4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20() 79 vsqrtx4567 = vfmaq_f32(vsqrtx4567, vresidual4567, vsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20() 88 const float32x4_t vadjustment4567 = vfmsq_f32(vx4567, vsqrtx4567, vsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20() 94 const float32x4_t vy4567 = vfmaq_f32(vsqrtx4567, vhalfrsqrtx4567, vadjustment4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20()
|
D | neonfma-nr2fma1adj-x24.c | 46 float32x4_t vsqrtx4567 = vmulq_f32(vrsqrtx4567, vx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24() local 58 float32x4_t vresidual4567 = vfmsq_f32(vhalf, vsqrtx4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24() 67 vsqrtx4567 = vfmaq_f32(vsqrtx4567, vresidual4567, vsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24() 78 vresidual4567 = vfmsq_f32(vhalf, vsqrtx4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24() 87 vsqrtx4567 = vfmaq_f32(vsqrtx4567, vresidual4567, vsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24() 98 const float32x4_t vadjustment4567 = vfmsq_f32(vx4567, vsqrtx4567, vsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24() 105 const float32x4_t vy4567 = vfmaq_f32(vsqrtx4567, vhalfrsqrtx4567, vadjustment4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24()
|
D | neonfma-nr1rsqrts1fma1adj-x12.c | 52 float32x4_t vsqrtx4567 = vmulq_f32(vrsqrtx4567, vx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x12() local 58 const float32x4_t vresidual4567 = vfmsq_f32(vhalf, vsqrtx4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x12() 64 vsqrtx4567 = vfmaq_f32(vsqrtx4567, vresidual4567, vsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x12() 69 const float32x4_t vadjustment4567 = vfmsq_f32(vx4567, vsqrtx4567, vsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x12() 73 const float32x4_t vy4567 = vfmaq_f32(vsqrtx4567, vhalfrsqrtx4567, vadjustment4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x12()
|
D | neonfma-nr2fma1adj-x28.c | 48 float32x4_t vsqrtx4567 = vmulq_f32(vrsqrtx4567, vx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28() local 62 float32x4_t vresidual4567 = vfmsq_f32(vhalf, vsqrtx4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28() 72 vsqrtx4567 = vfmaq_f32(vsqrtx4567, vresidual4567, vsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28() 85 vresidual4567 = vfmsq_f32(vhalf, vsqrtx4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28() 95 vsqrtx4567 = vfmaq_f32(vsqrtx4567, vresidual4567, vsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28() 108 const float32x4_t vadjustment4567 = vfmsq_f32(vx4567, vsqrtx4567, vsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28() 116 const float32x4_t vy4567 = vfmaq_f32(vsqrtx4567, vhalfrsqrtx4567, vadjustment4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28()
|
D | neonfma-nr1rsqrts1fma1adj-x16.c | 57 float32x4_t vsqrtx4567 = vmulq_f32(vrsqrtx4567, vx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x16() local 65 const float32x4_t vresidual4567 = vfmsq_f32(vhalf, vsqrtx4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x16() 72 vsqrtx4567 = vfmaq_f32(vsqrtx4567, vresidual4567, vsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x16() 79 const float32x4_t vadjustment4567 = vfmsq_f32(vx4567, vsqrtx4567, vsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x16() 84 const float32x4_t vy4567 = vfmaq_f32(vsqrtx4567, vhalfrsqrtx4567, vadjustment4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x16()
|
D | neonfma-nr2fma1adj-x32.c | 50 float32x4_t vsqrtx4567 = vmulq_f32(vrsqrtx4567, vx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32() local 66 float32x4_t vresidual4567 = vfmsq_f32(vhalf, vsqrtx4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32() 77 vsqrtx4567 = vfmaq_f32(vsqrtx4567, vresidual4567, vsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32() 92 vresidual4567 = vfmsq_f32(vhalf, vsqrtx4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32() 103 vsqrtx4567 = vfmaq_f32(vsqrtx4567, vresidual4567, vsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32() 118 const float32x4_t vadjustment4567 = vfmsq_f32(vx4567, vsqrtx4567, vsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32() 127 const float32x4_t vy4567 = vfmaq_f32(vsqrtx4567, vhalfrsqrtx4567, vadjustment4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32()
|
D | neonfma-nr2fma1adj-x36.c | 52 float32x4_t vsqrtx4567 = vmulq_f32(vrsqrtx4567, vx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36() local 70 float32x4_t vresidual4567 = vfmsq_f32(vhalf, vsqrtx4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36() 82 vsqrtx4567 = vfmaq_f32(vsqrtx4567, vresidual4567, vsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36() 99 vresidual4567 = vfmsq_f32(vhalf, vsqrtx4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36() 111 vsqrtx4567 = vfmaq_f32(vsqrtx4567, vresidual4567, vsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36() 128 const float32x4_t vadjustment4567 = vfmsq_f32(vx4567, vsqrtx4567, vsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36() 138 const float32x4_t vy4567 = vfmaq_f32(vsqrtx4567, vhalfrsqrtx4567, vadjustment4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36()
|
D | neonfma-nr1rsqrts1fma1adj-x20.c | 62 float32x4_t vsqrtx4567 = vmulq_f32(vrsqrtx4567, vx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x20() local 72 const float32x4_t vresidual4567 = vfmsq_f32(vhalf, vsqrtx4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x20() 80 vsqrtx4567 = vfmaq_f32(vsqrtx4567, vresidual4567, vsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x20() 89 const float32x4_t vadjustment4567 = vfmsq_f32(vx4567, vsqrtx4567, vsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x20() 95 const float32x4_t vy4567 = vfmaq_f32(vsqrtx4567, vhalfrsqrtx4567, vadjustment4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x20()
|
D | neonfma-nr2fma1adj-x40.c | 54 float32x4_t vsqrtx4567 = vmulq_f32(vrsqrtx4567, vx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() local 74 float32x4_t vresidual4567 = vfmsq_f32(vhalf, vsqrtx4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() 87 vsqrtx4567 = vfmaq_f32(vsqrtx4567, vresidual4567, vsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() 106 vresidual4567 = vfmsq_f32(vhalf, vsqrtx4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() 119 vsqrtx4567 = vfmaq_f32(vsqrtx4567, vresidual4567, vsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() 138 const float32x4_t vadjustment4567 = vfmsq_f32(vx4567, vsqrtx4567, vsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() 149 const float32x4_t vy4567 = vfmaq_f32(vsqrtx4567, vhalfrsqrtx4567, vadjustment4567); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40()
|
D | neonfma-nr1rsqrts1fma1adj-x24.c | 67 float32x4_t vsqrtx4567 = vmulq_f32(vrsqrtx4567, vx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24() local 79 const float32x4_t vresidual4567 = vfmsq_f32(vhalf, vsqrtx4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24() 88 vsqrtx4567 = vfmaq_f32(vsqrtx4567, vresidual4567, vsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24() 99 const float32x4_t vadjustment4567 = vfmsq_f32(vx4567, vsqrtx4567, vsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24() 106 const float32x4_t vy4567 = vfmaq_f32(vsqrtx4567, vhalfrsqrtx4567, vadjustment4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24()
|
D | neonfma-nr1rsqrts1fma1adj-x28.c | 72 float32x4_t vsqrtx4567 = vmulq_f32(vrsqrtx4567, vx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28() local 86 const float32x4_t vresidual4567 = vfmsq_f32(vhalf, vsqrtx4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28() 96 vsqrtx4567 = vfmaq_f32(vsqrtx4567, vresidual4567, vsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28() 109 const float32x4_t vadjustment4567 = vfmsq_f32(vx4567, vsqrtx4567, vsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28() 117 const float32x4_t vy4567 = vfmaq_f32(vsqrtx4567, vhalfrsqrtx4567, vadjustment4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28()
|
D | neonfma-nr1rsqrts1fma1adj-x32.c | 77 float32x4_t vsqrtx4567 = vmulq_f32(vrsqrtx4567, vx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32() local 93 const float32x4_t vresidual4567 = vfmsq_f32(vhalf, vsqrtx4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32() 104 vsqrtx4567 = vfmaq_f32(vsqrtx4567, vresidual4567, vsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32() 119 const float32x4_t vadjustment4567 = vfmsq_f32(vx4567, vsqrtx4567, vsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32() 128 const float32x4_t vy4567 = vfmaq_f32(vsqrtx4567, vhalfrsqrtx4567, vadjustment4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32()
|
D | neonfma-nr1rsqrts1fma1adj-x36.c | 82 float32x4_t vsqrtx4567 = vmulq_f32(vrsqrtx4567, vx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36() local 100 const float32x4_t vresidual4567 = vfmsq_f32(vhalf, vsqrtx4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36() 112 vsqrtx4567 = vfmaq_f32(vsqrtx4567, vresidual4567, vsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36() 129 const float32x4_t vadjustment4567 = vfmsq_f32(vx4567, vsqrtx4567, vsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36() 139 const float32x4_t vy4567 = vfmaq_f32(vsqrtx4567, vhalfrsqrtx4567, vadjustment4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36()
|
D | neonfma-nr1rsqrts1fma1adj-x40.c | 87 float32x4_t vsqrtx4567 = vmulq_f32(vrsqrtx4567, vx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40() local 107 const float32x4_t vresidual4567 = vfmsq_f32(vhalf, vsqrtx4567, vhalfrsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40() 120 vsqrtx4567 = vfmaq_f32(vsqrtx4567, vresidual4567, vsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40() 139 const float32x4_t vadjustment4567 = vfmsq_f32(vx4567, vsqrtx4567, vsqrtx4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40() 150 const float32x4_t vy4567 = vfmaq_f32(vsqrtx4567, vhalfrsqrtx4567, vadjustment4567); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40()
|