/external/XNNPACK/src/f32-vsqrt/gen/ |
D | neonfma-nr2fma1adj-x12.c | 47 float32x4_t vresidual89AB = vfmsq_f32(vhalf, vsqrtx89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x12() local 53 vhalfrsqrtx89AB = vfmaq_f32(vhalfrsqrtx89AB, vresidual89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x12() 54 vsqrtx89AB = vfmaq_f32(vsqrtx89AB, vresidual89AB, vsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x12() 58 vresidual89AB = vfmsq_f32(vhalf, vsqrtx89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x12() 64 vhalfrsqrtx89AB = vfmaq_f32(vhalfrsqrtx89AB, vresidual89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x12() 65 vsqrtx89AB = vfmaq_f32(vsqrtx89AB, vresidual89AB, vsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x12()
|
D | neonfma-nr2fma1adj-x16.c | 51 float32x4_t vresidual89AB = vfmsq_f32(vhalf, vsqrtx89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x16() local 58 vhalfrsqrtx89AB = vfmaq_f32(vhalfrsqrtx89AB, vresidual89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x16() 59 vsqrtx89AB = vfmaq_f32(vsqrtx89AB, vresidual89AB, vsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x16() 65 vresidual89AB = vfmsq_f32(vhalf, vsqrtx89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x16() 72 vhalfrsqrtx89AB = vfmaq_f32(vhalfrsqrtx89AB, vresidual89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x16() 73 vsqrtx89AB = vfmaq_f32(vsqrtx89AB, vresidual89AB, vsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x16()
|
D | neonfma-nr2fma1adj-x20.c | 55 float32x4_t vresidual89AB = vfmsq_f32(vhalf, vsqrtx89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20() local 63 vhalfrsqrtx89AB = vfmaq_f32(vhalfrsqrtx89AB, vresidual89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20() 64 vsqrtx89AB = vfmaq_f32(vsqrtx89AB, vresidual89AB, vsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20() 72 vresidual89AB = vfmsq_f32(vhalf, vsqrtx89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20() 80 vhalfrsqrtx89AB = vfmaq_f32(vhalfrsqrtx89AB, vresidual89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20() 81 vsqrtx89AB = vfmaq_f32(vsqrtx89AB, vresidual89AB, vsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20()
|
D | neonfma-nr2fma1adj-x24.c | 59 float32x4_t vresidual89AB = vfmsq_f32(vhalf, vsqrtx89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24() local 68 vhalfrsqrtx89AB = vfmaq_f32(vhalfrsqrtx89AB, vresidual89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24() 69 vsqrtx89AB = vfmaq_f32(vsqrtx89AB, vresidual89AB, vsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24() 79 vresidual89AB = vfmsq_f32(vhalf, vsqrtx89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24() 88 vhalfrsqrtx89AB = vfmaq_f32(vhalfrsqrtx89AB, vresidual89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24() 89 vsqrtx89AB = vfmaq_f32(vsqrtx89AB, vresidual89AB, vsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24()
|
D | neonfma-nr2fma1adj-x28.c | 63 float32x4_t vresidual89AB = vfmsq_f32(vhalf, vsqrtx89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28() local 73 vhalfrsqrtx89AB = vfmaq_f32(vhalfrsqrtx89AB, vresidual89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28() 74 vsqrtx89AB = vfmaq_f32(vsqrtx89AB, vresidual89AB, vsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28() 86 vresidual89AB = vfmsq_f32(vhalf, vsqrtx89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28() 96 vhalfrsqrtx89AB = vfmaq_f32(vhalfrsqrtx89AB, vresidual89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28() 97 vsqrtx89AB = vfmaq_f32(vsqrtx89AB, vresidual89AB, vsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28()
|
D | neonfma-nr2fma1adj-x32.c | 67 float32x4_t vresidual89AB = vfmsq_f32(vhalf, vsqrtx89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32() local 78 vhalfrsqrtx89AB = vfmaq_f32(vhalfrsqrtx89AB, vresidual89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32() 79 vsqrtx89AB = vfmaq_f32(vsqrtx89AB, vresidual89AB, vsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32() 93 vresidual89AB = vfmsq_f32(vhalf, vsqrtx89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32() 104 vhalfrsqrtx89AB = vfmaq_f32(vhalfrsqrtx89AB, vresidual89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32() 105 vsqrtx89AB = vfmaq_f32(vsqrtx89AB, vresidual89AB, vsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32()
|
D | neonfma-nr2fma1adj-x36.c | 71 float32x4_t vresidual89AB = vfmsq_f32(vhalf, vsqrtx89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36() local 83 vhalfrsqrtx89AB = vfmaq_f32(vhalfrsqrtx89AB, vresidual89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36() 84 vsqrtx89AB = vfmaq_f32(vsqrtx89AB, vresidual89AB, vsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36() 100 vresidual89AB = vfmsq_f32(vhalf, vsqrtx89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36() 112 vhalfrsqrtx89AB = vfmaq_f32(vhalfrsqrtx89AB, vresidual89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36() 113 vsqrtx89AB = vfmaq_f32(vsqrtx89AB, vresidual89AB, vsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36()
|
D | neonfma-nr1rsqrts1fma1adj-x12.c | 59 const float32x4_t vresidual89AB = vfmsq_f32(vhalf, vsqrtx89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x12() local 65 vhalfrsqrtx89AB = vfmaq_f32(vhalfrsqrtx89AB, vresidual89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x12() 66 vsqrtx89AB = vfmaq_f32(vsqrtx89AB, vresidual89AB, vsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x12()
|
D | neonfma-nr2fma1adj-x40.c | 75 float32x4_t vresidual89AB = vfmsq_f32(vhalf, vsqrtx89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() local 88 vhalfrsqrtx89AB = vfmaq_f32(vhalfrsqrtx89AB, vresidual89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() 89 vsqrtx89AB = vfmaq_f32(vsqrtx89AB, vresidual89AB, vsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() 107 vresidual89AB = vfmsq_f32(vhalf, vsqrtx89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() 120 vhalfrsqrtx89AB = vfmaq_f32(vhalfrsqrtx89AB, vresidual89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() 121 vsqrtx89AB = vfmaq_f32(vsqrtx89AB, vresidual89AB, vsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40()
|
D | neonfma-nr1rsqrts1fma1adj-x16.c | 66 const float32x4_t vresidual89AB = vfmsq_f32(vhalf, vsqrtx89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x16() local 73 vhalfrsqrtx89AB = vfmaq_f32(vhalfrsqrtx89AB, vresidual89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x16() 74 vsqrtx89AB = vfmaq_f32(vsqrtx89AB, vresidual89AB, vsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x16()
|
D | neonfma-nr1rsqrts1fma1adj-x20.c | 73 const float32x4_t vresidual89AB = vfmsq_f32(vhalf, vsqrtx89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x20() local 81 vhalfrsqrtx89AB = vfmaq_f32(vhalfrsqrtx89AB, vresidual89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x20() 82 vsqrtx89AB = vfmaq_f32(vsqrtx89AB, vresidual89AB, vsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x20()
|
D | neonfma-nr1rsqrts1fma1adj-x24.c | 80 const float32x4_t vresidual89AB = vfmsq_f32(vhalf, vsqrtx89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24() local 89 vhalfrsqrtx89AB = vfmaq_f32(vhalfrsqrtx89AB, vresidual89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24() 90 vsqrtx89AB = vfmaq_f32(vsqrtx89AB, vresidual89AB, vsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24()
|
D | neonfma-nr1rsqrts1fma1adj-x28.c | 87 const float32x4_t vresidual89AB = vfmsq_f32(vhalf, vsqrtx89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28() local 97 vhalfrsqrtx89AB = vfmaq_f32(vhalfrsqrtx89AB, vresidual89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28() 98 vsqrtx89AB = vfmaq_f32(vsqrtx89AB, vresidual89AB, vsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28()
|
D | neonfma-nr1rsqrts1fma1adj-x32.c | 94 const float32x4_t vresidual89AB = vfmsq_f32(vhalf, vsqrtx89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32() local 105 vhalfrsqrtx89AB = vfmaq_f32(vhalfrsqrtx89AB, vresidual89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32() 106 vsqrtx89AB = vfmaq_f32(vsqrtx89AB, vresidual89AB, vsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32()
|
D | neonfma-nr1rsqrts1fma1adj-x36.c | 101 const float32x4_t vresidual89AB = vfmsq_f32(vhalf, vsqrtx89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36() local 113 vhalfrsqrtx89AB = vfmaq_f32(vhalfrsqrtx89AB, vresidual89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36() 114 vsqrtx89AB = vfmaq_f32(vsqrtx89AB, vresidual89AB, vsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36()
|
D | neonfma-nr1rsqrts1fma1adj-x40.c | 108 const float32x4_t vresidual89AB = vfmsq_f32(vhalf, vsqrtx89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40() local 121 vhalfrsqrtx89AB = vfmaq_f32(vhalfrsqrtx89AB, vresidual89AB, vhalfrsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40() 122 vsqrtx89AB = vfmaq_f32(vsqrtx89AB, vresidual89AB, vsqrtx89AB); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40()
|