/external/XNNPACK/src/f32-vsqrt/gen/ |
D | neonfma-nr2fma1adj-x16.c | 46 float32x4_t vsqrtxCDEF = vmulq_f32(vrsqrtxCDEF, vxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x16() local 52 float32x4_t vresidualCDEF = vfmsq_f32(vhalf, vsqrtxCDEF, vhalfrsqrtxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x16() 61 vsqrtxCDEF = vfmaq_f32(vsqrtxCDEF, vresidualCDEF, vsqrtxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x16() 66 vresidualCDEF = vfmsq_f32(vhalf, vsqrtxCDEF, vhalfrsqrtxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x16() 75 vsqrtxCDEF = vfmaq_f32(vsqrtxCDEF, vresidualCDEF, vsqrtxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x16() 80 const float32x4_t vadjustmentCDEF = vfmsq_f32(vxCDEF, vsqrtxCDEF, vsqrtxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x16() 85 const float32x4_t vyCDEF = vfmaq_f32(vsqrtxCDEF, vhalfrsqrtxCDEF, vadjustmentCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x16()
|
D | neonfma-nr2fma1adj-x20.c | 48 float32x4_t vsqrtxCDEF = vmulq_f32(vrsqrtxCDEF, vxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20() local 56 float32x4_t vresidualCDEF = vfmsq_f32(vhalf, vsqrtxCDEF, vhalfrsqrtxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20() 66 vsqrtxCDEF = vfmaq_f32(vsqrtxCDEF, vresidualCDEF, vsqrtxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20() 73 vresidualCDEF = vfmsq_f32(vhalf, vsqrtxCDEF, vhalfrsqrtxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20() 83 vsqrtxCDEF = vfmaq_f32(vsqrtxCDEF, vresidualCDEF, vsqrtxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20() 90 const float32x4_t vadjustmentCDEF = vfmsq_f32(vxCDEF, vsqrtxCDEF, vsqrtxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20() 96 const float32x4_t vyCDEF = vfmaq_f32(vsqrtxCDEF, vhalfrsqrtxCDEF, vadjustmentCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20()
|
D | neonfma-nr2fma1adj-x24.c | 50 float32x4_t vsqrtxCDEF = vmulq_f32(vrsqrtxCDEF, vxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24() local 60 float32x4_t vresidualCDEF = vfmsq_f32(vhalf, vsqrtxCDEF, vhalfrsqrtxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24() 71 vsqrtxCDEF = vfmaq_f32(vsqrtxCDEF, vresidualCDEF, vsqrtxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24() 80 vresidualCDEF = vfmsq_f32(vhalf, vsqrtxCDEF, vhalfrsqrtxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24() 91 vsqrtxCDEF = vfmaq_f32(vsqrtxCDEF, vresidualCDEF, vsqrtxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24() 100 const float32x4_t vadjustmentCDEF = vfmsq_f32(vxCDEF, vsqrtxCDEF, vsqrtxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24() 107 const float32x4_t vyCDEF = vfmaq_f32(vsqrtxCDEF, vhalfrsqrtxCDEF, vadjustmentCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24()
|
D | neonfma-nr2fma1adj-x28.c | 52 float32x4_t vsqrtxCDEF = vmulq_f32(vrsqrtxCDEF, vxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28() local 64 float32x4_t vresidualCDEF = vfmsq_f32(vhalf, vsqrtxCDEF, vhalfrsqrtxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28() 76 vsqrtxCDEF = vfmaq_f32(vsqrtxCDEF, vresidualCDEF, vsqrtxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28() 87 vresidualCDEF = vfmsq_f32(vhalf, vsqrtxCDEF, vhalfrsqrtxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28() 99 vsqrtxCDEF = vfmaq_f32(vsqrtxCDEF, vresidualCDEF, vsqrtxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28() 110 const float32x4_t vadjustmentCDEF = vfmsq_f32(vxCDEF, vsqrtxCDEF, vsqrtxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28() 118 const float32x4_t vyCDEF = vfmaq_f32(vsqrtxCDEF, vhalfrsqrtxCDEF, vadjustmentCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28()
|
D | neonfma-nr1rsqrts1fma1adj-x16.c | 61 float32x4_t vsqrtxCDEF = vmulq_f32(vrsqrtxCDEF, vxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x16() local 67 const float32x4_t vresidualCDEF = vfmsq_f32(vhalf, vsqrtxCDEF, vhalfrsqrtxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x16() 76 vsqrtxCDEF = vfmaq_f32(vsqrtxCDEF, vresidualCDEF, vsqrtxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x16() 81 const float32x4_t vadjustmentCDEF = vfmsq_f32(vxCDEF, vsqrtxCDEF, vsqrtxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x16() 86 const float32x4_t vyCDEF = vfmaq_f32(vsqrtxCDEF, vhalfrsqrtxCDEF, vadjustmentCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x16()
|
D | neonfma-nr2fma1adj-x32.c | 54 float32x4_t vsqrtxCDEF = vmulq_f32(vrsqrtxCDEF, vxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32() local 68 float32x4_t vresidualCDEF = vfmsq_f32(vhalf, vsqrtxCDEF, vhalfrsqrtxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32() 81 vsqrtxCDEF = vfmaq_f32(vsqrtxCDEF, vresidualCDEF, vsqrtxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32() 94 vresidualCDEF = vfmsq_f32(vhalf, vsqrtxCDEF, vhalfrsqrtxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32() 107 vsqrtxCDEF = vfmaq_f32(vsqrtxCDEF, vresidualCDEF, vsqrtxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32() 120 const float32x4_t vadjustmentCDEF = vfmsq_f32(vxCDEF, vsqrtxCDEF, vsqrtxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32() 129 const float32x4_t vyCDEF = vfmaq_f32(vsqrtxCDEF, vhalfrsqrtxCDEF, vadjustmentCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32()
|
D | neonfma-nr2fma1adj-x36.c | 56 float32x4_t vsqrtxCDEF = vmulq_f32(vrsqrtxCDEF, vxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36() local 72 float32x4_t vresidualCDEF = vfmsq_f32(vhalf, vsqrtxCDEF, vhalfrsqrtxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36() 86 vsqrtxCDEF = vfmaq_f32(vsqrtxCDEF, vresidualCDEF, vsqrtxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36() 101 vresidualCDEF = vfmsq_f32(vhalf, vsqrtxCDEF, vhalfrsqrtxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36() 115 vsqrtxCDEF = vfmaq_f32(vsqrtxCDEF, vresidualCDEF, vsqrtxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36() 130 const float32x4_t vadjustmentCDEF = vfmsq_f32(vxCDEF, vsqrtxCDEF, vsqrtxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36() 140 const float32x4_t vyCDEF = vfmaq_f32(vsqrtxCDEF, vhalfrsqrtxCDEF, vadjustmentCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36()
|
D | neonfma-nr1rsqrts1fma1adj-x20.c | 66 float32x4_t vsqrtxCDEF = vmulq_f32(vrsqrtxCDEF, vxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x20() local 74 const float32x4_t vresidualCDEF = vfmsq_f32(vhalf, vsqrtxCDEF, vhalfrsqrtxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x20() 84 vsqrtxCDEF = vfmaq_f32(vsqrtxCDEF, vresidualCDEF, vsqrtxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x20() 91 const float32x4_t vadjustmentCDEF = vfmsq_f32(vxCDEF, vsqrtxCDEF, vsqrtxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x20() 97 const float32x4_t vyCDEF = vfmaq_f32(vsqrtxCDEF, vhalfrsqrtxCDEF, vadjustmentCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x20()
|
D | neonfma-nr2fma1adj-x40.c | 58 float32x4_t vsqrtxCDEF = vmulq_f32(vrsqrtxCDEF, vxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() local 76 float32x4_t vresidualCDEF = vfmsq_f32(vhalf, vsqrtxCDEF, vhalfrsqrtxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() 91 vsqrtxCDEF = vfmaq_f32(vsqrtxCDEF, vresidualCDEF, vsqrtxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() 108 vresidualCDEF = vfmsq_f32(vhalf, vsqrtxCDEF, vhalfrsqrtxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() 123 vsqrtxCDEF = vfmaq_f32(vsqrtxCDEF, vresidualCDEF, vsqrtxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() 140 const float32x4_t vadjustmentCDEF = vfmsq_f32(vxCDEF, vsqrtxCDEF, vsqrtxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() 151 const float32x4_t vyCDEF = vfmaq_f32(vsqrtxCDEF, vhalfrsqrtxCDEF, vadjustmentCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40()
|
D | neonfma-nr1rsqrts1fma1adj-x24.c | 71 float32x4_t vsqrtxCDEF = vmulq_f32(vrsqrtxCDEF, vxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24() local 81 const float32x4_t vresidualCDEF = vfmsq_f32(vhalf, vsqrtxCDEF, vhalfrsqrtxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24() 92 vsqrtxCDEF = vfmaq_f32(vsqrtxCDEF, vresidualCDEF, vsqrtxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24() 101 const float32x4_t vadjustmentCDEF = vfmsq_f32(vxCDEF, vsqrtxCDEF, vsqrtxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24() 108 const float32x4_t vyCDEF = vfmaq_f32(vsqrtxCDEF, vhalfrsqrtxCDEF, vadjustmentCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24()
|
D | neonfma-nr1rsqrts1fma1adj-x28.c | 76 float32x4_t vsqrtxCDEF = vmulq_f32(vrsqrtxCDEF, vxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28() local 88 const float32x4_t vresidualCDEF = vfmsq_f32(vhalf, vsqrtxCDEF, vhalfrsqrtxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28() 100 vsqrtxCDEF = vfmaq_f32(vsqrtxCDEF, vresidualCDEF, vsqrtxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28() 111 const float32x4_t vadjustmentCDEF = vfmsq_f32(vxCDEF, vsqrtxCDEF, vsqrtxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28() 119 const float32x4_t vyCDEF = vfmaq_f32(vsqrtxCDEF, vhalfrsqrtxCDEF, vadjustmentCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28()
|
D | neonfma-nr1rsqrts1fma1adj-x32.c | 81 float32x4_t vsqrtxCDEF = vmulq_f32(vrsqrtxCDEF, vxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32() local 95 const float32x4_t vresidualCDEF = vfmsq_f32(vhalf, vsqrtxCDEF, vhalfrsqrtxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32() 108 vsqrtxCDEF = vfmaq_f32(vsqrtxCDEF, vresidualCDEF, vsqrtxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32() 121 const float32x4_t vadjustmentCDEF = vfmsq_f32(vxCDEF, vsqrtxCDEF, vsqrtxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32() 130 const float32x4_t vyCDEF = vfmaq_f32(vsqrtxCDEF, vhalfrsqrtxCDEF, vadjustmentCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32()
|
D | neonfma-nr1rsqrts1fma1adj-x36.c | 86 float32x4_t vsqrtxCDEF = vmulq_f32(vrsqrtxCDEF, vxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36() local 102 const float32x4_t vresidualCDEF = vfmsq_f32(vhalf, vsqrtxCDEF, vhalfrsqrtxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36() 116 vsqrtxCDEF = vfmaq_f32(vsqrtxCDEF, vresidualCDEF, vsqrtxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36() 131 const float32x4_t vadjustmentCDEF = vfmsq_f32(vxCDEF, vsqrtxCDEF, vsqrtxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36() 141 const float32x4_t vyCDEF = vfmaq_f32(vsqrtxCDEF, vhalfrsqrtxCDEF, vadjustmentCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36()
|
D | neonfma-nr1rsqrts1fma1adj-x40.c | 91 float32x4_t vsqrtxCDEF = vmulq_f32(vrsqrtxCDEF, vxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40() local 109 const float32x4_t vresidualCDEF = vfmsq_f32(vhalf, vsqrtxCDEF, vhalfrsqrtxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40() 124 vsqrtxCDEF = vfmaq_f32(vsqrtxCDEF, vresidualCDEF, vsqrtxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40() 141 const float32x4_t vadjustmentCDEF = vfmsq_f32(vxCDEF, vsqrtxCDEF, vsqrtxCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40() 152 const float32x4_t vyCDEF = vfmaq_f32(vsqrtxCDEF, vhalfrsqrtxCDEF, vadjustmentCDEF); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40()
|