/external/XNNPACK/src/f32-vsqrt/gen/ |
D | neonfma-nr2fma1adj-x8.c | 37 float32x4_t vhalfrsqrtx0123 = vmulq_f32(vrsqrtx0123, vhalf); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x8() local 41 float32x4_t vresidual0123 = vfmsq_f32(vhalf, vsqrtx0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x8() 44 vhalfrsqrtx0123 = vfmaq_f32(vhalfrsqrtx0123, vresidual0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x8() 49 vresidual0123 = vfmsq_f32(vhalf, vsqrtx0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x8() 52 vhalfrsqrtx0123 = vfmaq_f32(vhalfrsqrtx0123, vresidual0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x8() 60 const float32x4_t vy0123 = vfmaq_f32(vsqrtx0123, vhalfrsqrtx0123, vadjustment0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x8()
|
D | neonfma-nr2fma1adj-x12.c | 39 float32x4_t vhalfrsqrtx0123 = vmulq_f32(vrsqrtx0123, vhalf); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x12() local 45 float32x4_t vresidual0123 = vfmsq_f32(vhalf, vsqrtx0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x12() 49 vhalfrsqrtx0123 = vfmaq_f32(vhalfrsqrtx0123, vresidual0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x12() 56 vresidual0123 = vfmsq_f32(vhalf, vsqrtx0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x12() 60 vhalfrsqrtx0123 = vfmaq_f32(vhalfrsqrtx0123, vresidual0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x12() 71 const float32x4_t vy0123 = vfmaq_f32(vsqrtx0123, vhalfrsqrtx0123, vadjustment0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x12()
|
D | neonfma-nr2fma1adj-x16.c | 41 float32x4_t vhalfrsqrtx0123 = vmulq_f32(vrsqrtx0123, vhalf); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x16() local 49 float32x4_t vresidual0123 = vfmsq_f32(vhalf, vsqrtx0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x16() 54 vhalfrsqrtx0123 = vfmaq_f32(vhalfrsqrtx0123, vresidual0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x16() 63 vresidual0123 = vfmsq_f32(vhalf, vsqrtx0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x16() 68 vhalfrsqrtx0123 = vfmaq_f32(vhalfrsqrtx0123, vresidual0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x16() 82 const float32x4_t vy0123 = vfmaq_f32(vsqrtx0123, vhalfrsqrtx0123, vadjustment0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x16()
|
D | neonfma-nr2fma1adj-x20.c | 43 float32x4_t vhalfrsqrtx0123 = vmulq_f32(vrsqrtx0123, vhalf); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20() local 53 float32x4_t vresidual0123 = vfmsq_f32(vhalf, vsqrtx0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20() 59 vhalfrsqrtx0123 = vfmaq_f32(vhalfrsqrtx0123, vresidual0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20() 70 vresidual0123 = vfmsq_f32(vhalf, vsqrtx0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20() 76 vhalfrsqrtx0123 = vfmaq_f32(vhalfrsqrtx0123, vresidual0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20() 93 const float32x4_t vy0123 = vfmaq_f32(vsqrtx0123, vhalfrsqrtx0123, vadjustment0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20()
|
D | neonfma-nr1rsqrts1fma1adj-x8.c | 46 float32x4_t vhalfrsqrtx0123 = vmulq_f32(vrsqrtx0123, vhalf); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x8() local 50 const float32x4_t vresidual0123 = vfmsq_f32(vhalf, vsqrtx0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x8() 53 vhalfrsqrtx0123 = vfmaq_f32(vhalfrsqrtx0123, vresidual0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x8() 61 const float32x4_t vy0123 = vfmaq_f32(vsqrtx0123, vhalfrsqrtx0123, vadjustment0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x8()
|
D | neonfma-nr2fma1adj-x24.c | 45 float32x4_t vhalfrsqrtx0123 = vmulq_f32(vrsqrtx0123, vhalf); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24() local 57 float32x4_t vresidual0123 = vfmsq_f32(vhalf, vsqrtx0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24() 64 vhalfrsqrtx0123 = vfmaq_f32(vhalfrsqrtx0123, vresidual0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24() 77 vresidual0123 = vfmsq_f32(vhalf, vsqrtx0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24() 84 vhalfrsqrtx0123 = vfmaq_f32(vhalfrsqrtx0123, vresidual0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24() 104 const float32x4_t vy0123 = vfmaq_f32(vsqrtx0123, vhalfrsqrtx0123, vadjustment0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24()
|
D | neonfma-nr2fma1adj-x28.c | 47 float32x4_t vhalfrsqrtx0123 = vmulq_f32(vrsqrtx0123, vhalf); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28() local 61 float32x4_t vresidual0123 = vfmsq_f32(vhalf, vsqrtx0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28() 69 vhalfrsqrtx0123 = vfmaq_f32(vhalfrsqrtx0123, vresidual0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28() 84 vresidual0123 = vfmsq_f32(vhalf, vsqrtx0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28() 92 vhalfrsqrtx0123 = vfmaq_f32(vhalfrsqrtx0123, vresidual0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28() 115 const float32x4_t vy0123 = vfmaq_f32(vsqrtx0123, vhalfrsqrtx0123, vadjustment0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28()
|
D | neonfma-nr1rsqrts1fma1adj-x12.c | 51 float32x4_t vhalfrsqrtx0123 = vmulq_f32(vrsqrtx0123, vhalf); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x12() local 57 const float32x4_t vresidual0123 = vfmsq_f32(vhalf, vsqrtx0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x12() 61 vhalfrsqrtx0123 = vfmaq_f32(vhalfrsqrtx0123, vresidual0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x12() 72 const float32x4_t vy0123 = vfmaq_f32(vsqrtx0123, vhalfrsqrtx0123, vadjustment0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x12()
|
D | neonfma-nr2fma1adj-x32.c | 49 float32x4_t vhalfrsqrtx0123 = vmulq_f32(vrsqrtx0123, vhalf); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32() local 65 float32x4_t vresidual0123 = vfmsq_f32(vhalf, vsqrtx0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32() 74 vhalfrsqrtx0123 = vfmaq_f32(vhalfrsqrtx0123, vresidual0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32() 91 vresidual0123 = vfmsq_f32(vhalf, vsqrtx0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32() 100 vhalfrsqrtx0123 = vfmaq_f32(vhalfrsqrtx0123, vresidual0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32() 126 const float32x4_t vy0123 = vfmaq_f32(vsqrtx0123, vhalfrsqrtx0123, vadjustment0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32()
|
D | neonfma-nr2fma1adj-x36.c | 51 float32x4_t vhalfrsqrtx0123 = vmulq_f32(vrsqrtx0123, vhalf); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36() local 69 float32x4_t vresidual0123 = vfmsq_f32(vhalf, vsqrtx0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36() 79 vhalfrsqrtx0123 = vfmaq_f32(vhalfrsqrtx0123, vresidual0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36() 98 vresidual0123 = vfmsq_f32(vhalf, vsqrtx0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36() 108 vhalfrsqrtx0123 = vfmaq_f32(vhalfrsqrtx0123, vresidual0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36() 137 const float32x4_t vy0123 = vfmaq_f32(vsqrtx0123, vhalfrsqrtx0123, vadjustment0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36()
|
D | neonfma-nr1rsqrts1fma1adj-x16.c | 56 float32x4_t vhalfrsqrtx0123 = vmulq_f32(vrsqrtx0123, vhalf); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x16() local 64 const float32x4_t vresidual0123 = vfmsq_f32(vhalf, vsqrtx0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x16() 69 vhalfrsqrtx0123 = vfmaq_f32(vhalfrsqrtx0123, vresidual0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x16() 83 const float32x4_t vy0123 = vfmaq_f32(vsqrtx0123, vhalfrsqrtx0123, vadjustment0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x16()
|
D | neonfma-nr2fma1adj-x40.c | 53 float32x4_t vhalfrsqrtx0123 = vmulq_f32(vrsqrtx0123, vhalf); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() local 73 float32x4_t vresidual0123 = vfmsq_f32(vhalf, vsqrtx0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() 84 vhalfrsqrtx0123 = vfmaq_f32(vhalfrsqrtx0123, vresidual0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() 105 vresidual0123 = vfmsq_f32(vhalf, vsqrtx0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() 116 vhalfrsqrtx0123 = vfmaq_f32(vhalfrsqrtx0123, vresidual0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() 148 const float32x4_t vy0123 = vfmaq_f32(vsqrtx0123, vhalfrsqrtx0123, vadjustment0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40()
|
D | neonfma-nr1rsqrts1fma1adj-x20.c | 61 float32x4_t vhalfrsqrtx0123 = vmulq_f32(vrsqrtx0123, vhalf); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x20() local 71 const float32x4_t vresidual0123 = vfmsq_f32(vhalf, vsqrtx0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x20() 77 vhalfrsqrtx0123 = vfmaq_f32(vhalfrsqrtx0123, vresidual0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x20() 94 const float32x4_t vy0123 = vfmaq_f32(vsqrtx0123, vhalfrsqrtx0123, vadjustment0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x20()
|
D | neonfma-nr1rsqrts1fma1adj-x24.c | 66 float32x4_t vhalfrsqrtx0123 = vmulq_f32(vrsqrtx0123, vhalf); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24() local 78 const float32x4_t vresidual0123 = vfmsq_f32(vhalf, vsqrtx0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24() 85 vhalfrsqrtx0123 = vfmaq_f32(vhalfrsqrtx0123, vresidual0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24() 105 const float32x4_t vy0123 = vfmaq_f32(vsqrtx0123, vhalfrsqrtx0123, vadjustment0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24()
|
D | neonfma-nr1rsqrts1fma1adj-x28.c | 71 float32x4_t vhalfrsqrtx0123 = vmulq_f32(vrsqrtx0123, vhalf); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28() local 85 const float32x4_t vresidual0123 = vfmsq_f32(vhalf, vsqrtx0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28() 93 vhalfrsqrtx0123 = vfmaq_f32(vhalfrsqrtx0123, vresidual0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28() 116 const float32x4_t vy0123 = vfmaq_f32(vsqrtx0123, vhalfrsqrtx0123, vadjustment0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28()
|
D | neonfma-nr1rsqrts1fma1adj-x32.c | 76 float32x4_t vhalfrsqrtx0123 = vmulq_f32(vrsqrtx0123, vhalf); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32() local 92 const float32x4_t vresidual0123 = vfmsq_f32(vhalf, vsqrtx0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32() 101 vhalfrsqrtx0123 = vfmaq_f32(vhalfrsqrtx0123, vresidual0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32() 127 const float32x4_t vy0123 = vfmaq_f32(vsqrtx0123, vhalfrsqrtx0123, vadjustment0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32()
|
D | neonfma-nr1rsqrts1fma1adj-x36.c | 81 float32x4_t vhalfrsqrtx0123 = vmulq_f32(vrsqrtx0123, vhalf); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36() local 99 const float32x4_t vresidual0123 = vfmsq_f32(vhalf, vsqrtx0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36() 109 vhalfrsqrtx0123 = vfmaq_f32(vhalfrsqrtx0123, vresidual0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36() 138 const float32x4_t vy0123 = vfmaq_f32(vsqrtx0123, vhalfrsqrtx0123, vadjustment0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36()
|
D | neonfma-nr1rsqrts1fma1adj-x40.c | 86 float32x4_t vhalfrsqrtx0123 = vmulq_f32(vrsqrtx0123, vhalf); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40() local 106 const float32x4_t vresidual0123 = vfmsq_f32(vhalf, vsqrtx0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40() 117 vhalfrsqrtx0123 = vfmaq_f32(vhalfrsqrtx0123, vresidual0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40() 149 const float32x4_t vy0123 = vfmaq_f32(vsqrtx0123, vhalfrsqrtx0123, vadjustment0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40()
|