/external/XNNPACK/src/f32-vsqrt/gen/ |
D | neonfma-nr2fma1adj-x8.c | 36 float32x4_t vsqrtx0123 = vmulq_f32(vrsqrtx0123, vx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x8() local 41 float32x4_t vresidual0123 = vfmsq_f32(vhalf, vsqrtx0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x8() 45 vsqrtx0123 = vfmaq_f32(vsqrtx0123, vresidual0123, vsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x8() 49 vresidual0123 = vfmsq_f32(vhalf, vsqrtx0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x8() 53 vsqrtx0123 = vfmaq_f32(vsqrtx0123, vresidual0123, vsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x8() 57 const float32x4_t vadjustment0123 = vfmsq_f32(vx0123, vsqrtx0123, vsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x8() 60 const float32x4_t vy0123 = vfmaq_f32(vsqrtx0123, vhalfrsqrtx0123, vadjustment0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x8()
|
D | neonfma-nr2fma1adj-x12.c | 38 float32x4_t vsqrtx0123 = vmulq_f32(vrsqrtx0123, vx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x12() local 45 float32x4_t vresidual0123 = vfmsq_f32(vhalf, vsqrtx0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x12() 50 vsqrtx0123 = vfmaq_f32(vsqrtx0123, vresidual0123, vsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x12() 56 vresidual0123 = vfmsq_f32(vhalf, vsqrtx0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x12() 61 vsqrtx0123 = vfmaq_f32(vsqrtx0123, vresidual0123, vsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x12() 67 const float32x4_t vadjustment0123 = vfmsq_f32(vx0123, vsqrtx0123, vsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x12() 71 const float32x4_t vy0123 = vfmaq_f32(vsqrtx0123, vhalfrsqrtx0123, vadjustment0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x12()
|
D | neonfma-nr2fma1adj-x16.c | 40 float32x4_t vsqrtx0123 = vmulq_f32(vrsqrtx0123, vx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x16() local 49 float32x4_t vresidual0123 = vfmsq_f32(vhalf, vsqrtx0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x16() 55 vsqrtx0123 = vfmaq_f32(vsqrtx0123, vresidual0123, vsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x16() 63 vresidual0123 = vfmsq_f32(vhalf, vsqrtx0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x16() 69 vsqrtx0123 = vfmaq_f32(vsqrtx0123, vresidual0123, vsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x16() 77 const float32x4_t vadjustment0123 = vfmsq_f32(vx0123, vsqrtx0123, vsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x16() 82 const float32x4_t vy0123 = vfmaq_f32(vsqrtx0123, vhalfrsqrtx0123, vadjustment0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x16()
|
D | neonfma-nr2fma1adj-x20.c | 42 float32x4_t vsqrtx0123 = vmulq_f32(vrsqrtx0123, vx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20() local 53 float32x4_t vresidual0123 = vfmsq_f32(vhalf, vsqrtx0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20() 60 vsqrtx0123 = vfmaq_f32(vsqrtx0123, vresidual0123, vsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20() 70 vresidual0123 = vfmsq_f32(vhalf, vsqrtx0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20() 77 vsqrtx0123 = vfmaq_f32(vsqrtx0123, vresidual0123, vsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20() 87 const float32x4_t vadjustment0123 = vfmsq_f32(vx0123, vsqrtx0123, vsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20() 93 const float32x4_t vy0123 = vfmaq_f32(vsqrtx0123, vhalfrsqrtx0123, vadjustment0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x20()
|
D | neonfma-nr2fma1adj-x24.c | 44 float32x4_t vsqrtx0123 = vmulq_f32(vrsqrtx0123, vx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24() local 57 float32x4_t vresidual0123 = vfmsq_f32(vhalf, vsqrtx0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24() 65 vsqrtx0123 = vfmaq_f32(vsqrtx0123, vresidual0123, vsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24() 77 vresidual0123 = vfmsq_f32(vhalf, vsqrtx0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24() 85 vsqrtx0123 = vfmaq_f32(vsqrtx0123, vresidual0123, vsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24() 97 const float32x4_t vadjustment0123 = vfmsq_f32(vx0123, vsqrtx0123, vsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24() 104 const float32x4_t vy0123 = vfmaq_f32(vsqrtx0123, vhalfrsqrtx0123, vadjustment0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x24()
|
D | neonfma-nr1rsqrts1fma1adj-x8.c | 45 float32x4_t vsqrtx0123 = vmulq_f32(vrsqrtx0123, vx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x8() local 50 const float32x4_t vresidual0123 = vfmsq_f32(vhalf, vsqrtx0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x8() 54 vsqrtx0123 = vfmaq_f32(vsqrtx0123, vresidual0123, vsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x8() 58 const float32x4_t vadjustment0123 = vfmsq_f32(vx0123, vsqrtx0123, vsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x8() 61 const float32x4_t vy0123 = vfmaq_f32(vsqrtx0123, vhalfrsqrtx0123, vadjustment0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x8()
|
D | neonfma-nr2fma1adj-x28.c | 46 float32x4_t vsqrtx0123 = vmulq_f32(vrsqrtx0123, vx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28() local 61 float32x4_t vresidual0123 = vfmsq_f32(vhalf, vsqrtx0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28() 70 vsqrtx0123 = vfmaq_f32(vsqrtx0123, vresidual0123, vsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28() 84 vresidual0123 = vfmsq_f32(vhalf, vsqrtx0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28() 93 vsqrtx0123 = vfmaq_f32(vsqrtx0123, vresidual0123, vsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28() 107 const float32x4_t vadjustment0123 = vfmsq_f32(vx0123, vsqrtx0123, vsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28() 115 const float32x4_t vy0123 = vfmaq_f32(vsqrtx0123, vhalfrsqrtx0123, vadjustment0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x28()
|
D | neonfma-nr1rsqrts1fma1adj-x12.c | 50 float32x4_t vsqrtx0123 = vmulq_f32(vrsqrtx0123, vx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x12() local 57 const float32x4_t vresidual0123 = vfmsq_f32(vhalf, vsqrtx0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x12() 62 vsqrtx0123 = vfmaq_f32(vsqrtx0123, vresidual0123, vsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x12() 68 const float32x4_t vadjustment0123 = vfmsq_f32(vx0123, vsqrtx0123, vsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x12() 72 const float32x4_t vy0123 = vfmaq_f32(vsqrtx0123, vhalfrsqrtx0123, vadjustment0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x12()
|
D | neonfma-nr2fma1adj-x32.c | 48 float32x4_t vsqrtx0123 = vmulq_f32(vrsqrtx0123, vx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32() local 65 float32x4_t vresidual0123 = vfmsq_f32(vhalf, vsqrtx0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32() 75 vsqrtx0123 = vfmaq_f32(vsqrtx0123, vresidual0123, vsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32() 91 vresidual0123 = vfmsq_f32(vhalf, vsqrtx0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32() 101 vsqrtx0123 = vfmaq_f32(vsqrtx0123, vresidual0123, vsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32() 117 const float32x4_t vadjustment0123 = vfmsq_f32(vx0123, vsqrtx0123, vsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32() 126 const float32x4_t vy0123 = vfmaq_f32(vsqrtx0123, vhalfrsqrtx0123, vadjustment0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x32()
|
D | neonfma-nr2fma1adj-x36.c | 50 float32x4_t vsqrtx0123 = vmulq_f32(vrsqrtx0123, vx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36() local 69 float32x4_t vresidual0123 = vfmsq_f32(vhalf, vsqrtx0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36() 80 vsqrtx0123 = vfmaq_f32(vsqrtx0123, vresidual0123, vsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36() 98 vresidual0123 = vfmsq_f32(vhalf, vsqrtx0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36() 109 vsqrtx0123 = vfmaq_f32(vsqrtx0123, vresidual0123, vsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36() 127 const float32x4_t vadjustment0123 = vfmsq_f32(vx0123, vsqrtx0123, vsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36() 137 const float32x4_t vy0123 = vfmaq_f32(vsqrtx0123, vhalfrsqrtx0123, vadjustment0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x36()
|
D | neonfma-nr1rsqrts1fma1adj-x16.c | 55 float32x4_t vsqrtx0123 = vmulq_f32(vrsqrtx0123, vx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x16() local 64 const float32x4_t vresidual0123 = vfmsq_f32(vhalf, vsqrtx0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x16() 70 vsqrtx0123 = vfmaq_f32(vsqrtx0123, vresidual0123, vsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x16() 78 const float32x4_t vadjustment0123 = vfmsq_f32(vx0123, vsqrtx0123, vsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x16() 83 const float32x4_t vy0123 = vfmaq_f32(vsqrtx0123, vhalfrsqrtx0123, vadjustment0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x16()
|
D | neonfma-nr2fma1adj-x40.c | 52 float32x4_t vsqrtx0123 = vmulq_f32(vrsqrtx0123, vx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() local 73 float32x4_t vresidual0123 = vfmsq_f32(vhalf, vsqrtx0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() 85 vsqrtx0123 = vfmaq_f32(vsqrtx0123, vresidual0123, vsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() 105 vresidual0123 = vfmsq_f32(vhalf, vsqrtx0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() 117 vsqrtx0123 = vfmaq_f32(vsqrtx0123, vresidual0123, vsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() 137 const float32x4_t vadjustment0123 = vfmsq_f32(vx0123, vsqrtx0123, vsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40() 148 const float32x4_t vy0123 = vfmaq_f32(vsqrtx0123, vhalfrsqrtx0123, vadjustment0123); in xnn_f32_vsqrt_ukernel__neonfma_nr2fma1adj_x40()
|
D | neonfma-nr1rsqrts1fma1adj-x20.c | 60 float32x4_t vsqrtx0123 = vmulq_f32(vrsqrtx0123, vx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x20() local 71 const float32x4_t vresidual0123 = vfmsq_f32(vhalf, vsqrtx0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x20() 78 vsqrtx0123 = vfmaq_f32(vsqrtx0123, vresidual0123, vsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x20() 88 const float32x4_t vadjustment0123 = vfmsq_f32(vx0123, vsqrtx0123, vsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x20() 94 const float32x4_t vy0123 = vfmaq_f32(vsqrtx0123, vhalfrsqrtx0123, vadjustment0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x20()
|
D | neonfma-nr1rsqrts1fma1adj-x24.c | 65 float32x4_t vsqrtx0123 = vmulq_f32(vrsqrtx0123, vx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24() local 78 const float32x4_t vresidual0123 = vfmsq_f32(vhalf, vsqrtx0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24() 86 vsqrtx0123 = vfmaq_f32(vsqrtx0123, vresidual0123, vsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24() 98 const float32x4_t vadjustment0123 = vfmsq_f32(vx0123, vsqrtx0123, vsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24() 105 const float32x4_t vy0123 = vfmaq_f32(vsqrtx0123, vhalfrsqrtx0123, vadjustment0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x24()
|
D | neonfma-nr1rsqrts1fma1adj-x28.c | 70 float32x4_t vsqrtx0123 = vmulq_f32(vrsqrtx0123, vx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28() local 85 const float32x4_t vresidual0123 = vfmsq_f32(vhalf, vsqrtx0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28() 94 vsqrtx0123 = vfmaq_f32(vsqrtx0123, vresidual0123, vsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28() 108 const float32x4_t vadjustment0123 = vfmsq_f32(vx0123, vsqrtx0123, vsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28() 116 const float32x4_t vy0123 = vfmaq_f32(vsqrtx0123, vhalfrsqrtx0123, vadjustment0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x28()
|
D | neonfma-nr1rsqrts1fma1adj-x32.c | 75 float32x4_t vsqrtx0123 = vmulq_f32(vrsqrtx0123, vx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32() local 92 const float32x4_t vresidual0123 = vfmsq_f32(vhalf, vsqrtx0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32() 102 vsqrtx0123 = vfmaq_f32(vsqrtx0123, vresidual0123, vsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32() 118 const float32x4_t vadjustment0123 = vfmsq_f32(vx0123, vsqrtx0123, vsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32() 127 const float32x4_t vy0123 = vfmaq_f32(vsqrtx0123, vhalfrsqrtx0123, vadjustment0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x32()
|
D | neonfma-nr1rsqrts1fma1adj-x36.c | 80 float32x4_t vsqrtx0123 = vmulq_f32(vrsqrtx0123, vx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36() local 99 const float32x4_t vresidual0123 = vfmsq_f32(vhalf, vsqrtx0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36() 110 vsqrtx0123 = vfmaq_f32(vsqrtx0123, vresidual0123, vsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36() 128 const float32x4_t vadjustment0123 = vfmsq_f32(vx0123, vsqrtx0123, vsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36() 138 const float32x4_t vy0123 = vfmaq_f32(vsqrtx0123, vhalfrsqrtx0123, vadjustment0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x36()
|
D | neonfma-nr1rsqrts1fma1adj-x40.c | 85 float32x4_t vsqrtx0123 = vmulq_f32(vrsqrtx0123, vx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40() local 106 const float32x4_t vresidual0123 = vfmsq_f32(vhalf, vsqrtx0123, vhalfrsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40() 118 vsqrtx0123 = vfmaq_f32(vsqrtx0123, vresidual0123, vsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40() 138 const float32x4_t vadjustment0123 = vfmsq_f32(vx0123, vsqrtx0123, vsqrtx0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40() 149 const float32x4_t vy0123 = vfmaq_f32(vsqrtx0123, vhalfrsqrtx0123, vadjustment0123); in xnn_f32_vsqrt_ukernel__neonfma_nr1rsqrts1fma1adj_x40()
|