/external/XNNPACK/src/f32-vsqrt/gen/ |
D | neonfma-nr2fma1adj-x24.c | 54 float32x4_t vsqrtxKLMN = vmulq_f32(vrsqrtxKLMN, vxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24() local 62 float32x4_t vresidualKLMN = vfmsq_f32(vhalf, vsqrtxKLMN, vhalfrsqrtxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24() 75 vsqrtxKLMN = vfmaq_f32(vsqrtxKLMN, vresidualKLMN, vsqrtxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24() 82 vresidualKLMN = vfmsq_f32(vhalf, vsqrtxKLMN, vhalfrsqrtxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24() 95 vsqrtxKLMN = vfmaq_f32(vsqrtxKLMN, vresidualKLMN, vsqrtxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24() 102 const float32x4_t vadjustmentKLMN = vfmsq_f32(vxKLMN, vsqrtxKLMN, vsqrtxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24() 109 const float32x4_t vyKLMN = vfmaq_f32(vsqrtxKLMN, vhalfrsqrtxKLMN, vadjustmentKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24()
|
D | neonfma-nr2fma1adj-x28.c | 56 float32x4_t vsqrtxKLMN = vmulq_f32(vrsqrtxKLMN, vxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28() local 66 float32x4_t vresidualKLMN = vfmsq_f32(vhalf, vsqrtxKLMN, vhalfrsqrtxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28() 80 vsqrtxKLMN = vfmaq_f32(vsqrtxKLMN, vresidualKLMN, vsqrtxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28() 89 vresidualKLMN = vfmsq_f32(vhalf, vsqrtxKLMN, vhalfrsqrtxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28() 103 vsqrtxKLMN = vfmaq_f32(vsqrtxKLMN, vresidualKLMN, vsqrtxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28() 112 const float32x4_t vadjustmentKLMN = vfmsq_f32(vxKLMN, vsqrtxKLMN, vsqrtxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28() 120 const float32x4_t vyKLMN = vfmaq_f32(vsqrtxKLMN, vhalfrsqrtxKLMN, vadjustmentKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28()
|
D | neonfma-nr2fma1adj-x32.c | 58 float32x4_t vsqrtxKLMN = vmulq_f32(vrsqrtxKLMN, vxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32() local 70 float32x4_t vresidualKLMN = vfmsq_f32(vhalf, vsqrtxKLMN, vhalfrsqrtxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32() 85 vsqrtxKLMN = vfmaq_f32(vsqrtxKLMN, vresidualKLMN, vsqrtxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32() 96 vresidualKLMN = vfmsq_f32(vhalf, vsqrtxKLMN, vhalfrsqrtxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32() 111 vsqrtxKLMN = vfmaq_f32(vsqrtxKLMN, vresidualKLMN, vsqrtxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32() 122 const float32x4_t vadjustmentKLMN = vfmsq_f32(vxKLMN, vsqrtxKLMN, vsqrtxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32() 131 const float32x4_t vyKLMN = vfmaq_f32(vsqrtxKLMN, vhalfrsqrtxKLMN, vadjustmentKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32()
|
D | neonfma-nr2fma1adj-x36.c | 60 float32x4_t vsqrtxKLMN = vmulq_f32(vrsqrtxKLMN, vxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36() local 74 float32x4_t vresidualKLMN = vfmsq_f32(vhalf, vsqrtxKLMN, vhalfrsqrtxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36() 90 vsqrtxKLMN = vfmaq_f32(vsqrtxKLMN, vresidualKLMN, vsqrtxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36() 103 vresidualKLMN = vfmsq_f32(vhalf, vsqrtxKLMN, vhalfrsqrtxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36() 119 vsqrtxKLMN = vfmaq_f32(vsqrtxKLMN, vresidualKLMN, vsqrtxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36() 132 const float32x4_t vadjustmentKLMN = vfmsq_f32(vxKLMN, vsqrtxKLMN, vsqrtxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36() 142 const float32x4_t vyKLMN = vfmaq_f32(vsqrtxKLMN, vhalfrsqrtxKLMN, vadjustmentKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36()
|
D | neonfma-nr2fma1adj-x40.c | 62 float32x4_t vsqrtxKLMN = vmulq_f32(vrsqrtxKLMN, vxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() local 78 float32x4_t vresidualKLMN = vfmsq_f32(vhalf, vsqrtxKLMN, vhalfrsqrtxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() 95 vsqrtxKLMN = vfmaq_f32(vsqrtxKLMN, vresidualKLMN, vsqrtxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() 110 vresidualKLMN = vfmsq_f32(vhalf, vsqrtxKLMN, vhalfrsqrtxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() 127 vsqrtxKLMN = vfmaq_f32(vsqrtxKLMN, vresidualKLMN, vsqrtxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() 142 const float32x4_t vadjustmentKLMN = vfmsq_f32(vxKLMN, vsqrtxKLMN, vsqrtxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() 153 const float32x4_t vyKLMN = vfmaq_f32(vsqrtxKLMN, vhalfrsqrtxKLMN, vadjustmentKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40()
|
D | neonfma-nr1rsqrts1fma1adj-x24.c | 75 float32x4_t vsqrtxKLMN = vmulq_f32(vrsqrtxKLMN, vxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24() local 83 const float32x4_t vresidualKLMN = vfmsq_f32(vhalf, vsqrtxKLMN, vhalfrsqrtxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24() 96 vsqrtxKLMN = vfmaq_f32(vsqrtxKLMN, vresidualKLMN, vsqrtxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24() 103 const float32x4_t vadjustmentKLMN = vfmsq_f32(vxKLMN, vsqrtxKLMN, vsqrtxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24() 110 const float32x4_t vyKLMN = vfmaq_f32(vsqrtxKLMN, vhalfrsqrtxKLMN, vadjustmentKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24()
|
D | neonfma-nr1rsqrts1fma1adj-x28.c | 80 float32x4_t vsqrtxKLMN = vmulq_f32(vrsqrtxKLMN, vxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28() local 90 const float32x4_t vresidualKLMN = vfmsq_f32(vhalf, vsqrtxKLMN, vhalfrsqrtxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28() 104 vsqrtxKLMN = vfmaq_f32(vsqrtxKLMN, vresidualKLMN, vsqrtxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28() 113 const float32x4_t vadjustmentKLMN = vfmsq_f32(vxKLMN, vsqrtxKLMN, vsqrtxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28() 121 const float32x4_t vyKLMN = vfmaq_f32(vsqrtxKLMN, vhalfrsqrtxKLMN, vadjustmentKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28()
|
D | neonfma-nr1rsqrts1fma1adj-x32.c | 85 float32x4_t vsqrtxKLMN = vmulq_f32(vrsqrtxKLMN, vxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32() local 97 const float32x4_t vresidualKLMN = vfmsq_f32(vhalf, vsqrtxKLMN, vhalfrsqrtxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32() 112 vsqrtxKLMN = vfmaq_f32(vsqrtxKLMN, vresidualKLMN, vsqrtxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32() 123 const float32x4_t vadjustmentKLMN = vfmsq_f32(vxKLMN, vsqrtxKLMN, vsqrtxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32() 132 const float32x4_t vyKLMN = vfmaq_f32(vsqrtxKLMN, vhalfrsqrtxKLMN, vadjustmentKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32()
|
D | neonfma-nr1rsqrts1fma1adj-x36.c | 90 float32x4_t vsqrtxKLMN = vmulq_f32(vrsqrtxKLMN, vxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36() local 104 const float32x4_t vresidualKLMN = vfmsq_f32(vhalf, vsqrtxKLMN, vhalfrsqrtxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36() 120 vsqrtxKLMN = vfmaq_f32(vsqrtxKLMN, vresidualKLMN, vsqrtxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36() 133 const float32x4_t vadjustmentKLMN = vfmsq_f32(vxKLMN, vsqrtxKLMN, vsqrtxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36() 143 const float32x4_t vyKLMN = vfmaq_f32(vsqrtxKLMN, vhalfrsqrtxKLMN, vadjustmentKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36()
|
D | neonfma-nr1rsqrts1fma1adj-x40.c | 95 float32x4_t vsqrtxKLMN = vmulq_f32(vrsqrtxKLMN, vxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40() local 111 const float32x4_t vresidualKLMN = vfmsq_f32(vhalf, vsqrtxKLMN, vhalfrsqrtxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40() 128 vsqrtxKLMN = vfmaq_f32(vsqrtxKLMN, vresidualKLMN, vsqrtxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40() 143 const float32x4_t vadjustmentKLMN = vfmsq_f32(vxKLMN, vsqrtxKLMN, vsqrtxKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40() 154 const float32x4_t vyKLMN = vfmaq_f32(vsqrtxKLMN, vhalfrsqrtxKLMN, vadjustmentKLMN); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40()
|