/external/XNNPACK/src/f32-vsqrt/gen/ |
D | neonfma-nr2fma1adj-x12.c | 42 float32x4_t vsqrtx89AB = vmulq_f32(vrsqrtx89AB, vx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x12() local 47 float32x4_t vresidual89AB = vfmsq_f32(vhalf, vsqrtx89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x12() 54 vsqrtx89AB = vfmaq_f32(vsqrtx89AB, vresidual89AB, vsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x12() 58 vresidual89AB = vfmsq_f32(vhalf, vsqrtx89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x12() 65 vsqrtx89AB = vfmaq_f32(vsqrtx89AB, vresidual89AB, vsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x12() 69 const float32x4_t vadjustment89AB = vfmsq_f32(vx89AB, vsqrtx89AB, vsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x12() 73 const float32x4_t vy89AB = vfmaq_f32(vsqrtx89AB, vhalfrsqrtx89AB, vadjustment89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x12()
|
D | neonfma-nr2fma1adj-x16.c | 44 float32x4_t vsqrtx89AB = vmulq_f32(vrsqrtx89AB, vx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x16() local 51 float32x4_t vresidual89AB = vfmsq_f32(vhalf, vsqrtx89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x16() 59 vsqrtx89AB = vfmaq_f32(vsqrtx89AB, vresidual89AB, vsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x16() 65 vresidual89AB = vfmsq_f32(vhalf, vsqrtx89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x16() 73 vsqrtx89AB = vfmaq_f32(vsqrtx89AB, vresidual89AB, vsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x16() 79 const float32x4_t vadjustment89AB = vfmsq_f32(vx89AB, vsqrtx89AB, vsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x16() 84 const float32x4_t vy89AB = vfmaq_f32(vsqrtx89AB, vhalfrsqrtx89AB, vadjustment89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x16()
|
D | neonfma-nr2fma1adj-x20.c | 46 float32x4_t vsqrtx89AB = vmulq_f32(vrsqrtx89AB, vx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20() local 55 float32x4_t vresidual89AB = vfmsq_f32(vhalf, vsqrtx89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20() 64 vsqrtx89AB = vfmaq_f32(vsqrtx89AB, vresidual89AB, vsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20() 72 vresidual89AB = vfmsq_f32(vhalf, vsqrtx89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20() 81 vsqrtx89AB = vfmaq_f32(vsqrtx89AB, vresidual89AB, vsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20() 89 const float32x4_t vadjustment89AB = vfmsq_f32(vx89AB, vsqrtx89AB, vsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20() 95 const float32x4_t vy89AB = vfmaq_f32(vsqrtx89AB, vhalfrsqrtx89AB, vadjustment89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20()
|
D | neonfma-nr2fma1adj-x24.c | 48 float32x4_t vsqrtx89AB = vmulq_f32(vrsqrtx89AB, vx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24() local 59 float32x4_t vresidual89AB = vfmsq_f32(vhalf, vsqrtx89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24() 69 vsqrtx89AB = vfmaq_f32(vsqrtx89AB, vresidual89AB, vsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24() 79 vresidual89AB = vfmsq_f32(vhalf, vsqrtx89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24() 89 vsqrtx89AB = vfmaq_f32(vsqrtx89AB, vresidual89AB, vsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24() 99 const float32x4_t vadjustment89AB = vfmsq_f32(vx89AB, vsqrtx89AB, vsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24() 106 const float32x4_t vy89AB = vfmaq_f32(vsqrtx89AB, vhalfrsqrtx89AB, vadjustment89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24()
|
D | neonfma-nr1rsqrts1fma1adj-x12.c | 54 float32x4_t vsqrtx89AB = vmulq_f32(vrsqrtx89AB, vx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x12() local 59 const float32x4_t vresidual89AB = vfmsq_f32(vhalf, vsqrtx89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x12() 66 vsqrtx89AB = vfmaq_f32(vsqrtx89AB, vresidual89AB, vsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x12() 70 const float32x4_t vadjustment89AB = vfmsq_f32(vx89AB, vsqrtx89AB, vsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x12() 74 const float32x4_t vy89AB = vfmaq_f32(vsqrtx89AB, vhalfrsqrtx89AB, vadjustment89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x12()
|
D | neonfma-nr2fma1adj-x28.c | 50 float32x4_t vsqrtx89AB = vmulq_f32(vrsqrtx89AB, vx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28() local 63 float32x4_t vresidual89AB = vfmsq_f32(vhalf, vsqrtx89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28() 74 vsqrtx89AB = vfmaq_f32(vsqrtx89AB, vresidual89AB, vsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28() 86 vresidual89AB = vfmsq_f32(vhalf, vsqrtx89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28() 97 vsqrtx89AB = vfmaq_f32(vsqrtx89AB, vresidual89AB, vsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28() 109 const float32x4_t vadjustment89AB = vfmsq_f32(vx89AB, vsqrtx89AB, vsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28() 117 const float32x4_t vy89AB = vfmaq_f32(vsqrtx89AB, vhalfrsqrtx89AB, vadjustment89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28()
|
D | neonfma-nr1rsqrts1fma1adj-x16.c | 59 float32x4_t vsqrtx89AB = vmulq_f32(vrsqrtx89AB, vx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x16() local 66 const float32x4_t vresidual89AB = vfmsq_f32(vhalf, vsqrtx89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x16() 74 vsqrtx89AB = vfmaq_f32(vsqrtx89AB, vresidual89AB, vsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x16() 80 const float32x4_t vadjustment89AB = vfmsq_f32(vx89AB, vsqrtx89AB, vsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x16() 85 const float32x4_t vy89AB = vfmaq_f32(vsqrtx89AB, vhalfrsqrtx89AB, vadjustment89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x16()
|
D | neonfma-nr2fma1adj-x32.c | 52 float32x4_t vsqrtx89AB = vmulq_f32(vrsqrtx89AB, vx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32() local 67 float32x4_t vresidual89AB = vfmsq_f32(vhalf, vsqrtx89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32() 79 vsqrtx89AB = vfmaq_f32(vsqrtx89AB, vresidual89AB, vsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32() 93 vresidual89AB = vfmsq_f32(vhalf, vsqrtx89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32() 105 vsqrtx89AB = vfmaq_f32(vsqrtx89AB, vresidual89AB, vsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32() 119 const float32x4_t vadjustment89AB = vfmsq_f32(vx89AB, vsqrtx89AB, vsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32() 128 const float32x4_t vy89AB = vfmaq_f32(vsqrtx89AB, vhalfrsqrtx89AB, vadjustment89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32()
|
D | neonfma-nr2fma1adj-x36.c | 54 float32x4_t vsqrtx89AB = vmulq_f32(vrsqrtx89AB, vx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36() local 71 float32x4_t vresidual89AB = vfmsq_f32(vhalf, vsqrtx89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36() 84 vsqrtx89AB = vfmaq_f32(vsqrtx89AB, vresidual89AB, vsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36() 100 vresidual89AB = vfmsq_f32(vhalf, vsqrtx89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36() 113 vsqrtx89AB = vfmaq_f32(vsqrtx89AB, vresidual89AB, vsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36() 129 const float32x4_t vadjustment89AB = vfmsq_f32(vx89AB, vsqrtx89AB, vsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36() 139 const float32x4_t vy89AB = vfmaq_f32(vsqrtx89AB, vhalfrsqrtx89AB, vadjustment89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36()
|
D | neonfma-nr1rsqrts1fma1adj-x20.c | 64 float32x4_t vsqrtx89AB = vmulq_f32(vrsqrtx89AB, vx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x20() local 73 const float32x4_t vresidual89AB = vfmsq_f32(vhalf, vsqrtx89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x20() 82 vsqrtx89AB = vfmaq_f32(vsqrtx89AB, vresidual89AB, vsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x20() 90 const float32x4_t vadjustment89AB = vfmsq_f32(vx89AB, vsqrtx89AB, vsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x20() 96 const float32x4_t vy89AB = vfmaq_f32(vsqrtx89AB, vhalfrsqrtx89AB, vadjustment89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x20()
|
D | neonfma-nr2fma1adj-x40.c | 56 float32x4_t vsqrtx89AB = vmulq_f32(vrsqrtx89AB, vx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() local 75 float32x4_t vresidual89AB = vfmsq_f32(vhalf, vsqrtx89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() 89 vsqrtx89AB = vfmaq_f32(vsqrtx89AB, vresidual89AB, vsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() 107 vresidual89AB = vfmsq_f32(vhalf, vsqrtx89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() 121 vsqrtx89AB = vfmaq_f32(vsqrtx89AB, vresidual89AB, vsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() 139 const float32x4_t vadjustment89AB = vfmsq_f32(vx89AB, vsqrtx89AB, vsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() 150 const float32x4_t vy89AB = vfmaq_f32(vsqrtx89AB, vhalfrsqrtx89AB, vadjustment89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40()
|
D | neonfma-nr1rsqrts1fma1adj-x24.c | 69 float32x4_t vsqrtx89AB = vmulq_f32(vrsqrtx89AB, vx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24() local 80 const float32x4_t vresidual89AB = vfmsq_f32(vhalf, vsqrtx89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24() 90 vsqrtx89AB = vfmaq_f32(vsqrtx89AB, vresidual89AB, vsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24() 100 const float32x4_t vadjustment89AB = vfmsq_f32(vx89AB, vsqrtx89AB, vsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24() 107 const float32x4_t vy89AB = vfmaq_f32(vsqrtx89AB, vhalfrsqrtx89AB, vadjustment89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24()
|
D | neonfma-nr1rsqrts1fma1adj-x28.c | 74 float32x4_t vsqrtx89AB = vmulq_f32(vrsqrtx89AB, vx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28() local 87 const float32x4_t vresidual89AB = vfmsq_f32(vhalf, vsqrtx89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28() 98 vsqrtx89AB = vfmaq_f32(vsqrtx89AB, vresidual89AB, vsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28() 110 const float32x4_t vadjustment89AB = vfmsq_f32(vx89AB, vsqrtx89AB, vsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28() 118 const float32x4_t vy89AB = vfmaq_f32(vsqrtx89AB, vhalfrsqrtx89AB, vadjustment89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28()
|
D | neonfma-nr1rsqrts1fma1adj-x32.c | 79 float32x4_t vsqrtx89AB = vmulq_f32(vrsqrtx89AB, vx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32() local 94 const float32x4_t vresidual89AB = vfmsq_f32(vhalf, vsqrtx89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32() 106 vsqrtx89AB = vfmaq_f32(vsqrtx89AB, vresidual89AB, vsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32() 120 const float32x4_t vadjustment89AB = vfmsq_f32(vx89AB, vsqrtx89AB, vsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32() 129 const float32x4_t vy89AB = vfmaq_f32(vsqrtx89AB, vhalfrsqrtx89AB, vadjustment89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32()
|
D | neonfma-nr1rsqrts1fma1adj-x36.c | 84 float32x4_t vsqrtx89AB = vmulq_f32(vrsqrtx89AB, vx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36() local 101 const float32x4_t vresidual89AB = vfmsq_f32(vhalf, vsqrtx89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36() 114 vsqrtx89AB = vfmaq_f32(vsqrtx89AB, vresidual89AB, vsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36() 130 const float32x4_t vadjustment89AB = vfmsq_f32(vx89AB, vsqrtx89AB, vsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36() 140 const float32x4_t vy89AB = vfmaq_f32(vsqrtx89AB, vhalfrsqrtx89AB, vadjustment89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36()
|
D | neonfma-nr1rsqrts1fma1adj-x40.c | 89 float32x4_t vsqrtx89AB = vmulq_f32(vrsqrtx89AB, vx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40() local 108 const float32x4_t vresidual89AB = vfmsq_f32(vhalf, vsqrtx89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40() 122 vsqrtx89AB = vfmaq_f32(vsqrtx89AB, vresidual89AB, vsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40() 140 const float32x4_t vadjustment89AB = vfmsq_f32(vx89AB, vsqrtx89AB, vsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40() 151 const float32x4_t vy89AB = vfmaq_f32(vsqrtx89AB, vhalfrsqrtx89AB, vadjustment89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40()
|