/external/XNNPACK/src/f32-vsqrt/gen/ |
D | neonfma-nr2fma1adj-x20.c | 50 float32x4_t vsqrtxGHIJ = vmulq_f32(vrsqrtxGHIJ, vxGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20() local 57 float32x4_t vresidualGHIJ = vfmsq_f32(vhalf, vsqrtxGHIJ, vhalfrsqrtxGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20() 68 vsqrtxGHIJ = vfmaq_f32(vsqrtxGHIJ, vresidualGHIJ, vsqrtxGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20() 74 vresidualGHIJ = vfmsq_f32(vhalf, vsqrtxGHIJ, vhalfrsqrtxGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20() 85 vsqrtxGHIJ = vfmaq_f32(vsqrtxGHIJ, vresidualGHIJ, vsqrtxGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20() 91 const float32x4_t vadjustmentGHIJ = vfmsq_f32(vxGHIJ, vsqrtxGHIJ, vsqrtxGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20() 97 const float32x4_t vyGHIJ = vfmaq_f32(vsqrtxGHIJ, vhalfrsqrtxGHIJ, vadjustmentGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20()
|
D | neonfma-nr2fma1adj-x24.c | 52 float32x4_t vsqrtxGHIJ = vmulq_f32(vrsqrtxGHIJ, vxGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24() local 61 float32x4_t vresidualGHIJ = vfmsq_f32(vhalf, vsqrtxGHIJ, vhalfrsqrtxGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24() 73 vsqrtxGHIJ = vfmaq_f32(vsqrtxGHIJ, vresidualGHIJ, vsqrtxGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24() 81 vresidualGHIJ = vfmsq_f32(vhalf, vsqrtxGHIJ, vhalfrsqrtxGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24() 93 vsqrtxGHIJ = vfmaq_f32(vsqrtxGHIJ, vresidualGHIJ, vsqrtxGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24() 101 const float32x4_t vadjustmentGHIJ = vfmsq_f32(vxGHIJ, vsqrtxGHIJ, vsqrtxGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24() 108 const float32x4_t vyGHIJ = vfmaq_f32(vsqrtxGHIJ, vhalfrsqrtxGHIJ, vadjustmentGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24()
|
D | neonfma-nr2fma1adj-x28.c | 54 float32x4_t vsqrtxGHIJ = vmulq_f32(vrsqrtxGHIJ, vxGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28() local 65 float32x4_t vresidualGHIJ = vfmsq_f32(vhalf, vsqrtxGHIJ, vhalfrsqrtxGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28() 78 vsqrtxGHIJ = vfmaq_f32(vsqrtxGHIJ, vresidualGHIJ, vsqrtxGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28() 88 vresidualGHIJ = vfmsq_f32(vhalf, vsqrtxGHIJ, vhalfrsqrtxGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28() 101 vsqrtxGHIJ = vfmaq_f32(vsqrtxGHIJ, vresidualGHIJ, vsqrtxGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28() 111 const float32x4_t vadjustmentGHIJ = vfmsq_f32(vxGHIJ, vsqrtxGHIJ, vsqrtxGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28() 119 const float32x4_t vyGHIJ = vfmaq_f32(vsqrtxGHIJ, vhalfrsqrtxGHIJ, vadjustmentGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28()
|
D | neonfma-nr2fma1adj-x32.c | 56 float32x4_t vsqrtxGHIJ = vmulq_f32(vrsqrtxGHIJ, vxGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32() local 69 float32x4_t vresidualGHIJ = vfmsq_f32(vhalf, vsqrtxGHIJ, vhalfrsqrtxGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32() 83 vsqrtxGHIJ = vfmaq_f32(vsqrtxGHIJ, vresidualGHIJ, vsqrtxGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32() 95 vresidualGHIJ = vfmsq_f32(vhalf, vsqrtxGHIJ, vhalfrsqrtxGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32() 109 vsqrtxGHIJ = vfmaq_f32(vsqrtxGHIJ, vresidualGHIJ, vsqrtxGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32() 121 const float32x4_t vadjustmentGHIJ = vfmsq_f32(vxGHIJ, vsqrtxGHIJ, vsqrtxGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32() 130 const float32x4_t vyGHIJ = vfmaq_f32(vsqrtxGHIJ, vhalfrsqrtxGHIJ, vadjustmentGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32()
|
D | neonfma-nr2fma1adj-x36.c | 58 float32x4_t vsqrtxGHIJ = vmulq_f32(vrsqrtxGHIJ, vxGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36() local 73 float32x4_t vresidualGHIJ = vfmsq_f32(vhalf, vsqrtxGHIJ, vhalfrsqrtxGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36() 88 vsqrtxGHIJ = vfmaq_f32(vsqrtxGHIJ, vresidualGHIJ, vsqrtxGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36() 102 vresidualGHIJ = vfmsq_f32(vhalf, vsqrtxGHIJ, vhalfrsqrtxGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36() 117 vsqrtxGHIJ = vfmaq_f32(vsqrtxGHIJ, vresidualGHIJ, vsqrtxGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36() 131 const float32x4_t vadjustmentGHIJ = vfmsq_f32(vxGHIJ, vsqrtxGHIJ, vsqrtxGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36() 141 const float32x4_t vyGHIJ = vfmaq_f32(vsqrtxGHIJ, vhalfrsqrtxGHIJ, vadjustmentGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36()
|
D | neonfma-nr1rsqrts1fma1adj-x20.c | 68 float32x4_t vsqrtxGHIJ = vmulq_f32(vrsqrtxGHIJ, vxGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x20() local 75 const float32x4_t vresidualGHIJ = vfmsq_f32(vhalf, vsqrtxGHIJ, vhalfrsqrtxGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x20() 86 vsqrtxGHIJ = vfmaq_f32(vsqrtxGHIJ, vresidualGHIJ, vsqrtxGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x20() 92 const float32x4_t vadjustmentGHIJ = vfmsq_f32(vxGHIJ, vsqrtxGHIJ, vsqrtxGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x20() 98 const float32x4_t vyGHIJ = vfmaq_f32(vsqrtxGHIJ, vhalfrsqrtxGHIJ, vadjustmentGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x20()
|
D | neonfma-nr2fma1adj-x40.c | 60 float32x4_t vsqrtxGHIJ = vmulq_f32(vrsqrtxGHIJ, vxGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() local 77 float32x4_t vresidualGHIJ = vfmsq_f32(vhalf, vsqrtxGHIJ, vhalfrsqrtxGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() 93 vsqrtxGHIJ = vfmaq_f32(vsqrtxGHIJ, vresidualGHIJ, vsqrtxGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() 109 vresidualGHIJ = vfmsq_f32(vhalf, vsqrtxGHIJ, vhalfrsqrtxGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() 125 vsqrtxGHIJ = vfmaq_f32(vsqrtxGHIJ, vresidualGHIJ, vsqrtxGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() 141 const float32x4_t vadjustmentGHIJ = vfmsq_f32(vxGHIJ, vsqrtxGHIJ, vsqrtxGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() 152 const float32x4_t vyGHIJ = vfmaq_f32(vsqrtxGHIJ, vhalfrsqrtxGHIJ, vadjustmentGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40()
|
D | neonfma-nr1rsqrts1fma1adj-x24.c | 73 float32x4_t vsqrtxGHIJ = vmulq_f32(vrsqrtxGHIJ, vxGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24() local 82 const float32x4_t vresidualGHIJ = vfmsq_f32(vhalf, vsqrtxGHIJ, vhalfrsqrtxGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24() 94 vsqrtxGHIJ = vfmaq_f32(vsqrtxGHIJ, vresidualGHIJ, vsqrtxGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24() 102 const float32x4_t vadjustmentGHIJ = vfmsq_f32(vxGHIJ, vsqrtxGHIJ, vsqrtxGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24() 109 const float32x4_t vyGHIJ = vfmaq_f32(vsqrtxGHIJ, vhalfrsqrtxGHIJ, vadjustmentGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24()
|
D | neonfma-nr1rsqrts1fma1adj-x28.c | 78 float32x4_t vsqrtxGHIJ = vmulq_f32(vrsqrtxGHIJ, vxGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28() local 89 const float32x4_t vresidualGHIJ = vfmsq_f32(vhalf, vsqrtxGHIJ, vhalfrsqrtxGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28() 102 vsqrtxGHIJ = vfmaq_f32(vsqrtxGHIJ, vresidualGHIJ, vsqrtxGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28() 112 const float32x4_t vadjustmentGHIJ = vfmsq_f32(vxGHIJ, vsqrtxGHIJ, vsqrtxGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28() 120 const float32x4_t vyGHIJ = vfmaq_f32(vsqrtxGHIJ, vhalfrsqrtxGHIJ, vadjustmentGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28()
|
D | neonfma-nr1rsqrts1fma1adj-x32.c | 83 float32x4_t vsqrtxGHIJ = vmulq_f32(vrsqrtxGHIJ, vxGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32() local 96 const float32x4_t vresidualGHIJ = vfmsq_f32(vhalf, vsqrtxGHIJ, vhalfrsqrtxGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32() 110 vsqrtxGHIJ = vfmaq_f32(vsqrtxGHIJ, vresidualGHIJ, vsqrtxGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32() 122 const float32x4_t vadjustmentGHIJ = vfmsq_f32(vxGHIJ, vsqrtxGHIJ, vsqrtxGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32() 131 const float32x4_t vyGHIJ = vfmaq_f32(vsqrtxGHIJ, vhalfrsqrtxGHIJ, vadjustmentGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32()
|
D | neonfma-nr1rsqrts1fma1adj-x36.c | 88 float32x4_t vsqrtxGHIJ = vmulq_f32(vrsqrtxGHIJ, vxGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36() local 103 const float32x4_t vresidualGHIJ = vfmsq_f32(vhalf, vsqrtxGHIJ, vhalfrsqrtxGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36() 118 vsqrtxGHIJ = vfmaq_f32(vsqrtxGHIJ, vresidualGHIJ, vsqrtxGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36() 132 const float32x4_t vadjustmentGHIJ = vfmsq_f32(vxGHIJ, vsqrtxGHIJ, vsqrtxGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36() 142 const float32x4_t vyGHIJ = vfmaq_f32(vsqrtxGHIJ, vhalfrsqrtxGHIJ, vadjustmentGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36()
|
D | neonfma-nr1rsqrts1fma1adj-x40.c | 93 float32x4_t vsqrtxGHIJ = vmulq_f32(vrsqrtxGHIJ, vxGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40() local 110 const float32x4_t vresidualGHIJ = vfmsq_f32(vhalf, vsqrtxGHIJ, vhalfrsqrtxGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40() 126 vsqrtxGHIJ = vfmaq_f32(vsqrtxGHIJ, vresidualGHIJ, vsqrtxGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40() 142 const float32x4_t vadjustmentGHIJ = vfmsq_f32(vxGHIJ, vsqrtxGHIJ, vsqrtxGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40() 153 const float32x4_t vyGHIJ = vfmaq_f32(vsqrtxGHIJ, vhalfrsqrtxGHIJ, vadjustmentGHIJ); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40()
|