/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | scalar-lut64-p2-x4-acc2.c | 36 const float vminus_ln2_o64_lo = 0x1.BD0106p-19f; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc2() local 109 vt0 = vn0 * vminus_ln2_o64_lo + vt0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc2() 110 vt1 = vn1 * vminus_ln2_o64_lo + vt1; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc2() 111 vt2 = vn2 * vminus_ln2_o64_lo + vt2; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc2() 112 vt3 = vn3 * vminus_ln2_o64_lo + vt3; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc2() 206 vt = vn * vminus_ln2_o64_lo + vt; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc2()
|
D | scalar-lut64-p2-x4.c | 36 const float vminus_ln2_o64_lo = 0x1.BD0106p-19f; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4() local 108 vt0 = vn0 * vminus_ln2_o64_lo + vt0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4() 109 vt1 = vn1 * vminus_ln2_o64_lo + vt1; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4() 110 vt2 = vn2 * vminus_ln2_o64_lo + vt2; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4() 111 vt3 = vn3 * vminus_ln2_o64_lo + vt3; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4() 203 vt = vn * vminus_ln2_o64_lo + vt; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4()
|
D | scalar-lut64-p2-x4-acc4.c | 36 const float vminus_ln2_o64_lo = 0x1.BD0106p-19f; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc4() local 111 vt0 = vn0 * vminus_ln2_o64_lo + vt0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc4() 112 vt1 = vn1 * vminus_ln2_o64_lo + vt1; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc4() 113 vt2 = vn2 * vminus_ln2_o64_lo + vt2; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc4() 114 vt3 = vn3 * vminus_ln2_o64_lo + vt3; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc4() 210 vt = vn * vminus_ln2_o64_lo + vt; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x4_acc4()
|
D | scalar-lut64-p2-x2-acc2.c | 36 const float vminus_ln2_o64_lo = 0x1.BD0106p-19f; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x2_acc2() local 93 vt0 = vn0 * vminus_ln2_o64_lo + vt0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x2_acc2() 94 vt1 = vn1 * vminus_ln2_o64_lo + vt1; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x2_acc2() 172 vt = vn * vminus_ln2_o64_lo + vt; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x2_acc2()
|
D | scalar-lut64-p2-x2.c | 36 const float vminus_ln2_o64_lo = 0x1.BD0106p-19f; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x2() local 92 vt0 = vn0 * vminus_ln2_o64_lo + vt0; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x2() 93 vt1 = vn1 * vminus_ln2_o64_lo + vt1; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x2() 169 vt = vn * vminus_ln2_o64_lo + vt; in xnn_f32_raddstoreexpminusmax_ukernel__scalar_lut64_p2_x2()
|
D | neonfma-lut64-p2-x20.c | 34 const float32x4_t vminus_ln2_o64_lo = vmovq_n_f32(0x1.05c61p-35f); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20() local 153 vt0123 = vfmaq_f32(vt0123, vn0123, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20() 154 vt4567 = vfmaq_f32(vt4567, vn4567, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20() 155 vt89AB = vfmaq_f32(vt89AB, vn89AB, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20() 156 vtCDEF = vfmaq_f32(vtCDEF, vnCDEF, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20() 157 vtGHIJ = vfmaq_f32(vtGHIJ, vnGHIJ, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20() 253 vt = vfmaq_f32(vt, vn, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20() 329 vt = vfmaq_f32(vt, vn, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20()
|
D | neonfma-lut64-p2-x20-acc5.c | 34 const float32x4_t vminus_ln2_o64_lo = vmovq_n_f32(0x1.05c61p-35f); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc5() local 157 vt0123 = vfmaq_f32(vt0123, vn0123, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc5() 158 vt4567 = vfmaq_f32(vt4567, vn4567, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc5() 159 vt89AB = vfmaq_f32(vt89AB, vn89AB, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc5() 160 vtCDEF = vfmaq_f32(vtCDEF, vnCDEF, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc5() 161 vtGHIJ = vfmaq_f32(vtGHIJ, vnGHIJ, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc5() 262 vt = vfmaq_f32(vt, vn, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc5() 338 vt = vfmaq_f32(vt, vn, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc5()
|
D | neonfma-lut64-p2-x20-acc2.c | 34 const float32x4_t vminus_ln2_o64_lo = vmovq_n_f32(0x1.05c61p-35f); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc2() local 154 vt0123 = vfmaq_f32(vt0123, vn0123, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc2() 155 vt4567 = vfmaq_f32(vt4567, vn4567, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc2() 156 vt89AB = vfmaq_f32(vt89AB, vn89AB, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc2() 157 vtCDEF = vfmaq_f32(vtCDEF, vnCDEF, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc2() 158 vtGHIJ = vfmaq_f32(vtGHIJ, vnGHIJ, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc2() 256 vt = vfmaq_f32(vt, vn, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc2() 332 vt = vfmaq_f32(vt, vn, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc2()
|
D | neon-lut64-p2-x20-acc2.c | 35 const float32x4_t vminus_ln2_o64_lo = vmovq_n_f32(0x1.BD0106p-19f); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc2() local 155 vt0123 = vmlaq_f32(vt0123, vn0123, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc2() 156 vt4567 = vmlaq_f32(vt4567, vn4567, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc2() 157 vt89AB = vmlaq_f32(vt89AB, vn89AB, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc2() 158 vtCDEF = vmlaq_f32(vtCDEF, vnCDEF, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc2() 159 vtGHIJ = vmlaq_f32(vtGHIJ, vnGHIJ, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc2() 257 vt = vmlaq_f32(vt, vn, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc2() 333 vt = vmlaq_f32(vt, vn, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc2()
|
D | neon-lut64-p2-x20.c | 35 const float32x4_t vminus_ln2_o64_lo = vmovq_n_f32(0x1.BD0106p-19f); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20() local 154 vt0123 = vmlaq_f32(vt0123, vn0123, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20() 155 vt4567 = vmlaq_f32(vt4567, vn4567, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20() 156 vt89AB = vmlaq_f32(vt89AB, vn89AB, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20() 157 vtCDEF = vmlaq_f32(vtCDEF, vnCDEF, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20() 158 vtGHIJ = vmlaq_f32(vtGHIJ, vnGHIJ, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20() 254 vt = vmlaq_f32(vt, vn, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20() 330 vt = vmlaq_f32(vt, vn, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20()
|
D | neon-lut64-p2-x20-acc5.c | 35 const float32x4_t vminus_ln2_o64_lo = vmovq_n_f32(0x1.BD0106p-19f); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc5() local 158 vt0123 = vmlaq_f32(vt0123, vn0123, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc5() 159 vt4567 = vmlaq_f32(vt4567, vn4567, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc5() 160 vt89AB = vmlaq_f32(vt89AB, vn89AB, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc5() 161 vtCDEF = vmlaq_f32(vtCDEF, vnCDEF, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc5() 162 vtGHIJ = vmlaq_f32(vtGHIJ, vnGHIJ, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc5() 263 vt = vmlaq_f32(vt, vn, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc5() 339 vt = vmlaq_f32(vt, vn, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc5()
|
D | neonfma-lut64-p2-x16-acc4.c | 34 const float32x4_t vminus_ln2_o64_lo = vmovq_n_f32(0x1.05c61p-35f); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc4() local 141 vt0123 = vfmaq_f32(vt0123, vn0123, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc4() 142 vt4567 = vfmaq_f32(vt4567, vn4567, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc4() 143 vt89AB = vfmaq_f32(vt89AB, vn89AB, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc4() 144 vtCDEF = vfmaq_f32(vtCDEF, vnCDEF, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc4() 238 vt = vfmaq_f32(vt, vn, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc4() 314 vt = vfmaq_f32(vt, vn, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc4()
|
D | neon-lut64-p2-x16-acc2.c | 35 const float32x4_t vminus_ln2_o64_lo = vmovq_n_f32(0x1.BD0106p-19f); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc2() local 140 vt0123 = vmlaq_f32(vt0123, vn0123, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc2() 141 vt4567 = vmlaq_f32(vt4567, vn4567, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc2() 142 vt89AB = vmlaq_f32(vt89AB, vn89AB, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc2() 143 vtCDEF = vmlaq_f32(vtCDEF, vnCDEF, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc2() 235 vt = vmlaq_f32(vt, vn, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc2() 311 vt = vmlaq_f32(vt, vn, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc2()
|
D | neon-lut64-p2-x16.c | 35 const float32x4_t vminus_ln2_o64_lo = vmovq_n_f32(0x1.BD0106p-19f); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16() local 139 vt0123 = vmlaq_f32(vt0123, vn0123, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16() 140 vt4567 = vmlaq_f32(vt4567, vn4567, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16() 141 vt89AB = vmlaq_f32(vt89AB, vn89AB, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16() 142 vtCDEF = vmlaq_f32(vtCDEF, vnCDEF, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16() 232 vt = vmlaq_f32(vt, vn, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16() 308 vt = vmlaq_f32(vt, vn, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16()
|
D | neonfma-lut64-p2-x16-acc2.c | 34 const float32x4_t vminus_ln2_o64_lo = vmovq_n_f32(0x1.05c61p-35f); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc2() local 139 vt0123 = vfmaq_f32(vt0123, vn0123, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc2() 140 vt4567 = vfmaq_f32(vt4567, vn4567, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc2() 141 vt89AB = vfmaq_f32(vt89AB, vn89AB, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc2() 142 vtCDEF = vfmaq_f32(vtCDEF, vnCDEF, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc2() 234 vt = vfmaq_f32(vt, vn, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc2() 310 vt = vfmaq_f32(vt, vn, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc2()
|
D | neon-lut64-p2-x16-acc4.c | 35 const float32x4_t vminus_ln2_o64_lo = vmovq_n_f32(0x1.BD0106p-19f); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc4() local 142 vt0123 = vmlaq_f32(vt0123, vn0123, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc4() 143 vt4567 = vmlaq_f32(vt4567, vn4567, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc4() 144 vt89AB = vmlaq_f32(vt89AB, vn89AB, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc4() 145 vtCDEF = vmlaq_f32(vtCDEF, vnCDEF, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc4() 239 vt = vmlaq_f32(vt, vn, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc4() 315 vt = vmlaq_f32(vt, vn, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc4()
|
D | neonfma-lut64-p2-x16.c | 34 const float32x4_t vminus_ln2_o64_lo = vmovq_n_f32(0x1.05c61p-35f); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16() local 138 vt0123 = vfmaq_f32(vt0123, vn0123, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16() 139 vt4567 = vfmaq_f32(vt4567, vn4567, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16() 140 vt89AB = vfmaq_f32(vt89AB, vn89AB, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16() 141 vtCDEF = vfmaq_f32(vtCDEF, vnCDEF, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16() 231 vt = vfmaq_f32(vt, vn, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16() 307 vt = vfmaq_f32(vt, vn, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16()
|
D | neonfma-lut64-p2-x12-acc3.c | 34 const float32x4_t vminus_ln2_o64_lo = vmovq_n_f32(0x1.05c61p-35f); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc3() local 125 vt0123 = vfmaq_f32(vt0123, vn0123, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc3() 126 vt4567 = vfmaq_f32(vt4567, vn4567, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc3() 127 vt89AB = vfmaq_f32(vt89AB, vn89AB, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc3() 214 vt = vfmaq_f32(vt, vn, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc3() 290 vt = vfmaq_f32(vt, vn, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc3()
|
D | neon-lut64-p2-x12-acc3.c | 35 const float32x4_t vminus_ln2_o64_lo = vmovq_n_f32(0x1.BD0106p-19f); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc3() local 126 vt0123 = vmlaq_f32(vt0123, vn0123, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc3() 127 vt4567 = vmlaq_f32(vt4567, vn4567, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc3() 128 vt89AB = vmlaq_f32(vt89AB, vn89AB, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc3() 215 vt = vmlaq_f32(vt, vn, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc3() 291 vt = vmlaq_f32(vt, vn, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc3()
|
D | neon-lut64-p2-x12-acc2.c | 35 const float32x4_t vminus_ln2_o64_lo = vmovq_n_f32(0x1.BD0106p-19f); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc2() local 125 vt0123 = vmlaq_f32(vt0123, vn0123, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc2() 126 vt4567 = vmlaq_f32(vt4567, vn4567, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc2() 127 vt89AB = vmlaq_f32(vt89AB, vn89AB, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc2() 213 vt = vmlaq_f32(vt, vn, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc2() 289 vt = vmlaq_f32(vt, vn, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc2()
|
D | neon-lut64-p2-x12.c | 35 const float32x4_t vminus_ln2_o64_lo = vmovq_n_f32(0x1.BD0106p-19f); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12() local 124 vt0123 = vmlaq_f32(vt0123, vn0123, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12() 125 vt4567 = vmlaq_f32(vt4567, vn4567, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12() 126 vt89AB = vmlaq_f32(vt89AB, vn89AB, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12() 210 vt = vmlaq_f32(vt, vn, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12() 286 vt = vmlaq_f32(vt, vn, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12()
|
D | neonfma-lut64-p2-x12-acc2.c | 34 const float32x4_t vminus_ln2_o64_lo = vmovq_n_f32(0x1.05c61p-35f); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc2() local 124 vt0123 = vfmaq_f32(vt0123, vn0123, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc2() 125 vt4567 = vfmaq_f32(vt4567, vn4567, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc2() 126 vt89AB = vfmaq_f32(vt89AB, vn89AB, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc2() 212 vt = vfmaq_f32(vt, vn, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc2() 288 vt = vfmaq_f32(vt, vn, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc2()
|
D | neonfma-lut64-p2-x12.c | 34 const float32x4_t vminus_ln2_o64_lo = vmovq_n_f32(0x1.05c61p-35f); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12() local 123 vt0123 = vfmaq_f32(vt0123, vn0123, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12() 124 vt4567 = vfmaq_f32(vt4567, vn4567, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12() 125 vt89AB = vfmaq_f32(vt89AB, vn89AB, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12() 209 vt = vfmaq_f32(vt, vn, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12() 285 vt = vfmaq_f32(vt, vn, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12()
|
D | neon-lut64-p2-x8-acc2.c | 35 const float32x4_t vminus_ln2_o64_lo = vmovq_n_f32(0x1.BD0106p-19f); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8_acc2() local 110 vt0123 = vmlaq_f32(vt0123, vn0123, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8_acc2() 111 vt4567 = vmlaq_f32(vt4567, vn4567, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8_acc2() 191 vt = vmlaq_f32(vt, vn, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8_acc2() 267 vt = vmlaq_f32(vt, vn, vminus_ln2_o64_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8_acc2()
|
/external/XNNPACK/src/math/ |
D | expminus-scalar-lut64-p2.c | 48 const float vminus_ln2_o64_lo = 0x1.BD0106p-19f; in xnn_math_f32_expminus__scalar_lut64_p2() local 89 vt = vn * vminus_ln2_o64_lo + vt; in xnn_math_f32_expminus__scalar_lut64_p2()
|