/external/XNNPACK/src/f32-velu/gen/ |
D | velu-neonfma-rr1-lut16-p3-x24.c | 66 …const uint64x2_t vidx0123 = vreinterpretq_u64_s32(vshlq_n_s32(vandq_s32(vreinterpretq_s32_f32(vn01… in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24() 67 const int32x4_t ven0123 = vshlq_n_s32(vreinterpretq_s32_f32(vn0123), 19); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24() 68 …const uint64x2_t vidx4567 = vreinterpretq_u64_s32(vshlq_n_s32(vandq_s32(vreinterpretq_s32_f32(vn45… in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24() 69 const int32x4_t ven4567 = vshlq_n_s32(vreinterpretq_s32_f32(vn4567), 19); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24() 70 …const uint64x2_t vidx89AB = vreinterpretq_u64_s32(vshlq_n_s32(vandq_s32(vreinterpretq_s32_f32(vn89… in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24() 71 const int32x4_t ven89AB = vshlq_n_s32(vreinterpretq_s32_f32(vn89AB), 19); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24() 72 …const uint64x2_t vidxCDEF = vreinterpretq_u64_s32(vshlq_n_s32(vandq_s32(vreinterpretq_s32_f32(vnCD… in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24() 73 const int32x4_t venCDEF = vshlq_n_s32(vreinterpretq_s32_f32(vnCDEF), 19); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24() 74 …const uint64x2_t vidxGHIJ = vreinterpretq_u64_s32(vshlq_n_s32(vandq_s32(vreinterpretq_s32_f32(vnGH… in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24() 75 const int32x4_t venGHIJ = vshlq_n_s32(vreinterpretq_s32_f32(vnGHIJ), 19); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x24() [all …]
|
D | velu-neon-rr2-lut16-p3-x24.c | 67 …const uint64x2_t vidx0123 = vreinterpretq_u64_s32(vshlq_n_s32(vandq_s32(vreinterpretq_s32_f32(vn01… in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x24() 68 const int32x4_t ven0123 = vshlq_n_s32(vreinterpretq_s32_f32(vn0123), 19); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x24() 69 …const uint64x2_t vidx4567 = vreinterpretq_u64_s32(vshlq_n_s32(vandq_s32(vreinterpretq_s32_f32(vn45… in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x24() 70 const int32x4_t ven4567 = vshlq_n_s32(vreinterpretq_s32_f32(vn4567), 19); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x24() 71 …const uint64x2_t vidx89AB = vreinterpretq_u64_s32(vshlq_n_s32(vandq_s32(vreinterpretq_s32_f32(vn89… in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x24() 72 const int32x4_t ven89AB = vshlq_n_s32(vreinterpretq_s32_f32(vn89AB), 19); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x24() 73 …const uint64x2_t vidxCDEF = vreinterpretq_u64_s32(vshlq_n_s32(vandq_s32(vreinterpretq_s32_f32(vnCD… in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x24() 74 const int32x4_t venCDEF = vshlq_n_s32(vreinterpretq_s32_f32(vnCDEF), 19); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x24() 75 …const uint64x2_t vidxGHIJ = vreinterpretq_u64_s32(vshlq_n_s32(vandq_s32(vreinterpretq_s32_f32(vnGH… in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x24() 76 const int32x4_t venGHIJ = vshlq_n_s32(vreinterpretq_s32_f32(vnGHIJ), 19); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x24() [all …]
|
D | velu-neon-rr2-lut16-p3-x20.c | 64 …const uint64x2_t vidx0123 = vreinterpretq_u64_s32(vshlq_n_s32(vandq_s32(vreinterpretq_s32_f32(vn01… in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x20() 65 const int32x4_t ven0123 = vshlq_n_s32(vreinterpretq_s32_f32(vn0123), 19); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x20() 66 …const uint64x2_t vidx4567 = vreinterpretq_u64_s32(vshlq_n_s32(vandq_s32(vreinterpretq_s32_f32(vn45… in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x20() 67 const int32x4_t ven4567 = vshlq_n_s32(vreinterpretq_s32_f32(vn4567), 19); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x20() 68 …const uint64x2_t vidx89AB = vreinterpretq_u64_s32(vshlq_n_s32(vandq_s32(vreinterpretq_s32_f32(vn89… in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x20() 69 const int32x4_t ven89AB = vshlq_n_s32(vreinterpretq_s32_f32(vn89AB), 19); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x20() 70 …const uint64x2_t vidxCDEF = vreinterpretq_u64_s32(vshlq_n_s32(vandq_s32(vreinterpretq_s32_f32(vnCD… in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x20() 71 const int32x4_t venCDEF = vshlq_n_s32(vreinterpretq_s32_f32(vnCDEF), 19); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x20() 72 …const uint64x2_t vidxGHIJ = vreinterpretq_u64_s32(vshlq_n_s32(vandq_s32(vreinterpretq_s32_f32(vnGH… in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x20() 73 const int32x4_t venGHIJ = vshlq_n_s32(vreinterpretq_s32_f32(vnGHIJ), 19); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x20() [all …]
|
D | velu-neonfma-rr1-lut16-p3-x20.c | 63 …const uint64x2_t vidx0123 = vreinterpretq_u64_s32(vshlq_n_s32(vandq_s32(vreinterpretq_s32_f32(vn01… in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x20() 64 const int32x4_t ven0123 = vshlq_n_s32(vreinterpretq_s32_f32(vn0123), 19); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x20() 65 …const uint64x2_t vidx4567 = vreinterpretq_u64_s32(vshlq_n_s32(vandq_s32(vreinterpretq_s32_f32(vn45… in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x20() 66 const int32x4_t ven4567 = vshlq_n_s32(vreinterpretq_s32_f32(vn4567), 19); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x20() 67 …const uint64x2_t vidx89AB = vreinterpretq_u64_s32(vshlq_n_s32(vandq_s32(vreinterpretq_s32_f32(vn89… in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x20() 68 const int32x4_t ven89AB = vshlq_n_s32(vreinterpretq_s32_f32(vn89AB), 19); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x20() 69 …const uint64x2_t vidxCDEF = vreinterpretq_u64_s32(vshlq_n_s32(vandq_s32(vreinterpretq_s32_f32(vnCD… in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x20() 70 const int32x4_t venCDEF = vshlq_n_s32(vreinterpretq_s32_f32(vnCDEF), 19); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x20() 71 …const uint64x2_t vidxGHIJ = vreinterpretq_u64_s32(vshlq_n_s32(vandq_s32(vreinterpretq_s32_f32(vnGH… in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x20() 72 const int32x4_t venGHIJ = vshlq_n_s32(vreinterpretq_s32_f32(vnGHIJ), 19); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x20() [all …]
|
D | velu-neonfma-rr1-lut16-p3-x12.c | 57 …const uint64x2_t vidx0123 = vreinterpretq_u64_s32(vshlq_n_s32(vandq_s32(vreinterpretq_s32_f32(vn01… in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x12() 58 const int32x4_t ven0123 = vshlq_n_s32(vreinterpretq_s32_f32(vn0123), 19); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x12() 59 …const uint64x2_t vidx4567 = vreinterpretq_u64_s32(vshlq_n_s32(vandq_s32(vreinterpretq_s32_f32(vn45… in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x12() 60 const int32x4_t ven4567 = vshlq_n_s32(vreinterpretq_s32_f32(vn4567), 19); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x12() 61 …const uint64x2_t vidx89AB = vreinterpretq_u64_s32(vshlq_n_s32(vandq_s32(vreinterpretq_s32_f32(vn89… in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x12() 62 const int32x4_t ven89AB = vshlq_n_s32(vreinterpretq_s32_f32(vn89AB), 19); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x12() 141 …const uint64x2_t vidx = vreinterpretq_u64_s32(vshlq_n_s32(vandq_s32(vreinterpretq_s32_f32(vn), vin… in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x12() 142 const int32x4_t ven = vshlq_n_s32(vreinterpretq_s32_f32(vn), 19); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x12() 177 …const uint64x2_t vidx = vreinterpretq_u64_s32(vshlq_n_s32(vandq_s32(vreinterpretq_s32_f32(vn), vin… in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x12() 178 const int32x4_t ven = vshlq_n_s32(vreinterpretq_s32_f32(vn), 19); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x12()
|
D | velu-neon-rr2-lut16-p3-x16.c | 61 …const uint64x2_t vidx0123 = vreinterpretq_u64_s32(vshlq_n_s32(vandq_s32(vreinterpretq_s32_f32(vn01… in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x16() 62 const int32x4_t ven0123 = vshlq_n_s32(vreinterpretq_s32_f32(vn0123), 19); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x16() 63 …const uint64x2_t vidx4567 = vreinterpretq_u64_s32(vshlq_n_s32(vandq_s32(vreinterpretq_s32_f32(vn45… in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x16() 64 const int32x4_t ven4567 = vshlq_n_s32(vreinterpretq_s32_f32(vn4567), 19); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x16() 65 …const uint64x2_t vidx89AB = vreinterpretq_u64_s32(vshlq_n_s32(vandq_s32(vreinterpretq_s32_f32(vn89… in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x16() 66 const int32x4_t ven89AB = vshlq_n_s32(vreinterpretq_s32_f32(vn89AB), 19); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x16() 67 …const uint64x2_t vidxCDEF = vreinterpretq_u64_s32(vshlq_n_s32(vandq_s32(vreinterpretq_s32_f32(vnCD… in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x16() 68 const int32x4_t venCDEF = vshlq_n_s32(vreinterpretq_s32_f32(vnCDEF), 19); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x16() 172 …const uint64x2_t vidx = vreinterpretq_u64_s32(vshlq_n_s32(vandq_s32(vreinterpretq_s32_f32(vn), vin… in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x16() 173 const int32x4_t ven = vshlq_n_s32(vreinterpretq_s32_f32(vn), 19); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x16() [all …]
|
D | velu-neonfma-rr1-lut16-p3-x16.c | 60 …const uint64x2_t vidx0123 = vreinterpretq_u64_s32(vshlq_n_s32(vandq_s32(vreinterpretq_s32_f32(vn01… in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x16() 61 const int32x4_t ven0123 = vshlq_n_s32(vreinterpretq_s32_f32(vn0123), 19); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x16() 62 …const uint64x2_t vidx4567 = vreinterpretq_u64_s32(vshlq_n_s32(vandq_s32(vreinterpretq_s32_f32(vn45… in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x16() 63 const int32x4_t ven4567 = vshlq_n_s32(vreinterpretq_s32_f32(vn4567), 19); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x16() 64 …const uint64x2_t vidx89AB = vreinterpretq_u64_s32(vshlq_n_s32(vandq_s32(vreinterpretq_s32_f32(vn89… in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x16() 65 const int32x4_t ven89AB = vshlq_n_s32(vreinterpretq_s32_f32(vn89AB), 19); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x16() 66 …const uint64x2_t vidxCDEF = vreinterpretq_u64_s32(vshlq_n_s32(vandq_s32(vreinterpretq_s32_f32(vnCD… in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x16() 67 const int32x4_t venCDEF = vshlq_n_s32(vreinterpretq_s32_f32(vnCDEF), 19); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x16() 166 …const uint64x2_t vidx = vreinterpretq_u64_s32(vshlq_n_s32(vandq_s32(vreinterpretq_s32_f32(vn), vin… in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x16() 167 const int32x4_t ven = vshlq_n_s32(vreinterpretq_s32_f32(vn), 19); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x16() [all …]
|
D | velu-neon-rr2-lut16-p3-x12.c | 58 …const uint64x2_t vidx0123 = vreinterpretq_u64_s32(vshlq_n_s32(vandq_s32(vreinterpretq_s32_f32(vn01… in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x12() 59 const int32x4_t ven0123 = vshlq_n_s32(vreinterpretq_s32_f32(vn0123), 19); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x12() 60 …const uint64x2_t vidx4567 = vreinterpretq_u64_s32(vshlq_n_s32(vandq_s32(vreinterpretq_s32_f32(vn45… in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x12() 61 const int32x4_t ven4567 = vshlq_n_s32(vreinterpretq_s32_f32(vn4567), 19); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x12() 62 …const uint64x2_t vidx89AB = vreinterpretq_u64_s32(vshlq_n_s32(vandq_s32(vreinterpretq_s32_f32(vn89… in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x12() 63 const int32x4_t ven89AB = vshlq_n_s32(vreinterpretq_s32_f32(vn89AB), 19); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x12() 146 …const uint64x2_t vidx = vreinterpretq_u64_s32(vshlq_n_s32(vandq_s32(vreinterpretq_s32_f32(vn), vin… in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x12() 147 const int32x4_t ven = vshlq_n_s32(vreinterpretq_s32_f32(vn), 19); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x12() 183 …const uint64x2_t vidx = vreinterpretq_u64_s32(vshlq_n_s32(vandq_s32(vreinterpretq_s32_f32(vn), vin… in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x12() 184 const int32x4_t ven = vshlq_n_s32(vreinterpretq_s32_f32(vn), 19); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x12()
|
D | velu-neonfma-rr1-lut16-p3-x8.c | 54 …const uint64x2_t vidx0123 = vreinterpretq_u64_s32(vshlq_n_s32(vandq_s32(vreinterpretq_s32_f32(vn01… in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x8() 55 const int32x4_t ven0123 = vshlq_n_s32(vreinterpretq_s32_f32(vn0123), 19); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x8() 56 …const uint64x2_t vidx4567 = vreinterpretq_u64_s32(vshlq_n_s32(vandq_s32(vreinterpretq_s32_f32(vn45… in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x8() 57 const int32x4_t ven4567 = vshlq_n_s32(vreinterpretq_s32_f32(vn4567), 19); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x8() 116 …const uint64x2_t vidx = vreinterpretq_u64_s32(vshlq_n_s32(vandq_s32(vreinterpretq_s32_f32(vn), vin… in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x8() 117 const int32x4_t ven = vshlq_n_s32(vreinterpretq_s32_f32(vn), 19); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x8() 152 …const uint64x2_t vidx = vreinterpretq_u64_s32(vshlq_n_s32(vandq_s32(vreinterpretq_s32_f32(vn), vin… in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x8() 153 const int32x4_t ven = vshlq_n_s32(vreinterpretq_s32_f32(vn), 19); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x8()
|
D | velu-neon-rr2-lut16-p3-x8.c | 55 …const uint64x2_t vidx0123 = vreinterpretq_u64_s32(vshlq_n_s32(vandq_s32(vreinterpretq_s32_f32(vn01… in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x8() 56 const int32x4_t ven0123 = vshlq_n_s32(vreinterpretq_s32_f32(vn0123), 19); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x8() 57 …const uint64x2_t vidx4567 = vreinterpretq_u64_s32(vshlq_n_s32(vandq_s32(vreinterpretq_s32_f32(vn45… in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x8() 58 const int32x4_t ven4567 = vshlq_n_s32(vreinterpretq_s32_f32(vn4567), 19); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x8() 120 …const uint64x2_t vidx = vreinterpretq_u64_s32(vshlq_n_s32(vandq_s32(vreinterpretq_s32_f32(vn), vin… in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x8() 121 const int32x4_t ven = vshlq_n_s32(vreinterpretq_s32_f32(vn), 19); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x8() 157 …const uint64x2_t vidx = vreinterpretq_u64_s32(vshlq_n_s32(vandq_s32(vreinterpretq_s32_f32(vn), vin… in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x8() 158 const int32x4_t ven = vshlq_n_s32(vreinterpretq_s32_f32(vn), 19); in xnn_f32_velu_ukernel__neon_rr2_lut16_p3_x8()
|
D | velu-neon-rr2-p6-x24.c | 67 float32x4_t vs0123 = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn0123), 23)); in xnn_f32_velu_ukernel__neon_rr2_p6_x24() 69 float32x4_t vs4567 = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn4567), 23)); in xnn_f32_velu_ukernel__neon_rr2_p6_x24() 71 float32x4_t vs89AB = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn89AB), 23)); in xnn_f32_velu_ukernel__neon_rr2_p6_x24() 73 float32x4_t vsCDEF = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnCDEF), 23)); in xnn_f32_velu_ukernel__neon_rr2_p6_x24() 75 float32x4_t vsGHIJ = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnGHIJ), 23)); in xnn_f32_velu_ukernel__neon_rr2_p6_x24() 77 float32x4_t vsKLMN = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnKLMN), 23)); in xnn_f32_velu_ukernel__neon_rr2_p6_x24() 189 float32x4_t vs = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn), 23)); in xnn_f32_velu_ukernel__neon_rr2_p6_x24() 218 float32x4_t vs = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn), 23)); in xnn_f32_velu_ukernel__neon_rr2_p6_x24()
|
D | velu-neonfma-rr1-p6-x24.c | 66 float32x4_t vs0123 = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn0123), 23)); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x24() 68 float32x4_t vs4567 = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn4567), 23)); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x24() 70 float32x4_t vs89AB = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn89AB), 23)); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x24() 72 float32x4_t vsCDEF = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnCDEF), 23)); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x24() 74 float32x4_t vsGHIJ = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnGHIJ), 23)); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x24() 76 float32x4_t vsKLMN = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnKLMN), 23)); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x24() 181 float32x4_t vs = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn), 23)); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x24() 209 float32x4_t vs = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn), 23)); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x24()
|
D | velu-neonfma-rr1-p6-x20.c | 63 float32x4_t vs0123 = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn0123), 23)); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20() 65 float32x4_t vs4567 = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn4567), 23)); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20() 67 float32x4_t vs89AB = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn89AB), 23)); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20() 69 float32x4_t vsCDEF = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnCDEF), 23)); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20() 71 float32x4_t vsGHIJ = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnGHIJ), 23)); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20() 162 float32x4_t vs = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn), 23)); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20() 190 float32x4_t vs = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn), 23)); in xnn_f32_velu_ukernel__neonfma_rr1_p6_x20()
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | neonfma-p5-x20.c | 72 … const float32x4_t vs0123 = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn0123), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20() 73 … const float32x4_t vs4567 = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn4567), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20() 74 … const float32x4_t vs89AB = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn89AB), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20() 75 … const float32x4_t vsCDEF = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnCDEF), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20() 76 … const float32x4_t vsGHIJ = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnGHIJ), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20() 181 const float32x4_t vs = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20() 238 const float32x4_t vs = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20()
|
D | neon-p5-x20-acc5.c | 77 … const float32x4_t vs0123 = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn0123), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc5() 78 … const float32x4_t vs4567 = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn4567), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc5() 79 … const float32x4_t vs89AB = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn89AB), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc5() 80 … const float32x4_t vsCDEF = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnCDEF), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc5() 81 … const float32x4_t vsGHIJ = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnGHIJ), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc5() 191 const float32x4_t vs = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc5() 248 const float32x4_t vs = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc5()
|
D | neonfma-p5-x20-acc5.c | 76 … const float32x4_t vs0123 = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn0123), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20_acc5() 77 … const float32x4_t vs4567 = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn4567), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20_acc5() 78 … const float32x4_t vs89AB = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn89AB), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20_acc5() 79 … const float32x4_t vsCDEF = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnCDEF), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20_acc5() 80 … const float32x4_t vsGHIJ = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnGHIJ), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20_acc5() 190 const float32x4_t vs = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20_acc5() 247 const float32x4_t vs = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20_acc5()
|
D | neon-p5-x20.c | 73 … const float32x4_t vs0123 = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn0123), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20() 74 … const float32x4_t vs4567 = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn4567), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20() 75 … const float32x4_t vs89AB = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn89AB), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20() 76 … const float32x4_t vsCDEF = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnCDEF), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20() 77 … const float32x4_t vsGHIJ = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnGHIJ), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20() 182 const float32x4_t vs = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20() 239 const float32x4_t vs = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20()
|
D | neon-p5-x20-acc2.c | 74 … const float32x4_t vs0123 = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn0123), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc2() 75 … const float32x4_t vs4567 = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn4567), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc2() 76 … const float32x4_t vs89AB = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn89AB), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc2() 77 … const float32x4_t vsCDEF = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnCDEF), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc2() 78 … const float32x4_t vsGHIJ = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnGHIJ), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc2() 185 const float32x4_t vs = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc2() 242 const float32x4_t vs = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x20_acc2()
|
D | neonfma-p5-x20-acc2.c | 73 … const float32x4_t vs0123 = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn0123), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20_acc2() 74 … const float32x4_t vs4567 = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn4567), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20_acc2() 75 … const float32x4_t vs89AB = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn89AB), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20_acc2() 76 … const float32x4_t vsCDEF = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnCDEF), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20_acc2() 77 … const float32x4_t vsGHIJ = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnGHIJ), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20_acc2() 184 const float32x4_t vs = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20_acc2() 241 const float32x4_t vs = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x20_acc2()
|
D | neon-p5-x16.c | 70 … const float32x4_t vs0123 = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn0123), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16() 71 … const float32x4_t vs4567 = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn4567), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16() 72 … const float32x4_t vs89AB = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn89AB), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16() 73 … const float32x4_t vsCDEF = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnCDEF), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16() 166 const float32x4_t vs = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16() 223 const float32x4_t vs = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16()
|
D | neonfma-p5-x16.c | 69 … const float32x4_t vs0123 = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn0123), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16() 70 … const float32x4_t vs4567 = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn4567), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16() 71 … const float32x4_t vs89AB = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn89AB), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16() 72 … const float32x4_t vsCDEF = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnCDEF), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16() 165 const float32x4_t vs = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16() 222 const float32x4_t vs = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16()
|
D | neonfma-p5-x16-acc2.c | 70 … const float32x4_t vs0123 = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn0123), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16_acc2() 71 … const float32x4_t vs4567 = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn4567), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16_acc2() 72 … const float32x4_t vs89AB = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn89AB), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16_acc2() 73 … const float32x4_t vsCDEF = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnCDEF), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16_acc2() 168 const float32x4_t vs = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16_acc2() 225 const float32x4_t vs = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn), 23)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16_acc2()
|
/external/XNNPACK/src/f32-sigmoid/gen/ |
D | neonfma-rr1-p5-div-x24.c | 59 … const float32x4_t vs0123 = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn0123), 23)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x24() 60 … const float32x4_t vs4567 = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn4567), 23)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x24() 61 … const float32x4_t vs89AB = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn89AB), 23)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x24() 62 … const float32x4_t vsCDEF = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnCDEF), 23)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x24() 63 … const float32x4_t vsGHIJ = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnGHIJ), 23)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x24() 64 … const float32x4_t vsKLMN = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnKLMN), 23)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x24() 170 const float32x4_t vs = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn), 23)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x24() 196 const float32x4_t vs = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn), 23)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x24()
|
D | neonfma-rr1-p5-div-x20.c | 56 … const float32x4_t vs0123 = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn0123), 23)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x20() 57 … const float32x4_t vs4567 = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn4567), 23)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x20() 58 … const float32x4_t vs89AB = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn89AB), 23)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x20() 59 … const float32x4_t vsCDEF = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnCDEF), 23)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x20() 60 … const float32x4_t vsGHIJ = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnGHIJ), 23)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x20() 152 const float32x4_t vs = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn), 23)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x20() 178 const float32x4_t vs = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn), 23)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x20()
|
D | neonfma-rr1-p5-div-x16.c | 53 … const float32x4_t vs0123 = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn0123), 23)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16() 54 … const float32x4_t vs4567 = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn4567), 23)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16() 55 … const float32x4_t vs89AB = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn89AB), 23)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16() 56 … const float32x4_t vsCDEF = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vnCDEF), 23)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16() 134 const float32x4_t vs = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn), 23)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16() 160 const float32x4_t vs = vreinterpretq_f32_s32(vshlq_n_s32(vreinterpretq_s32_f32(vn), 23)); in xnn_f32_sigmoid_ukernel__neonfma_rr1_p5_div_x16()
|