/external/XNNPACK/src/f16-raddstoreexpminusmax/gen/ |
D | neonfp16arith-rr2-p2-x96.c | 159 const uint16x8_t vm0 = vcltq_f16(vx0, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x96() 161 const uint16x8_t vm1 = vcltq_f16(vx1, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x96() 163 const uint16x8_t vm2 = vcltq_f16(vx2, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x96() 165 const uint16x8_t vm3 = vcltq_f16(vx3, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x96() 167 const uint16x8_t vm4 = vcltq_f16(vx4, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x96() 169 const uint16x8_t vm5 = vcltq_f16(vx5, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x96() 171 const uint16x8_t vm6 = vcltq_f16(vx6, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x96() 173 const uint16x8_t vm7 = vcltq_f16(vx7, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x96() 175 const uint16x8_t vm8 = vcltq_f16(vx8, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x96() 177 const uint16x8_t vm9 = vcltq_f16(vx9, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x96() [all …]
|
D | neonfp16arith-rr2-p2-x96-acc3.c | 161 const uint16x8_t vm0 = vcltq_f16(vx0, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x96_acc3() 163 const uint16x8_t vm1 = vcltq_f16(vx1, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x96_acc3() 165 const uint16x8_t vm2 = vcltq_f16(vx2, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x96_acc3() 167 const uint16x8_t vm3 = vcltq_f16(vx3, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x96_acc3() 169 const uint16x8_t vm4 = vcltq_f16(vx4, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x96_acc3() 171 const uint16x8_t vm5 = vcltq_f16(vx5, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x96_acc3() 173 const uint16x8_t vm6 = vcltq_f16(vx6, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x96_acc3() 175 const uint16x8_t vm7 = vcltq_f16(vx7, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x96_acc3() 177 const uint16x8_t vm8 = vcltq_f16(vx8, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x96_acc3() 179 const uint16x8_t vm9 = vcltq_f16(vx9, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x96_acc3() [all …]
|
D | neonfp16arith-rr2-p2-x96-acc2.c | 160 const uint16x8_t vm0 = vcltq_f16(vx0, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x96_acc2() 162 const uint16x8_t vm1 = vcltq_f16(vx1, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x96_acc2() 164 const uint16x8_t vm2 = vcltq_f16(vx2, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x96_acc2() 166 const uint16x8_t vm3 = vcltq_f16(vx3, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x96_acc2() 168 const uint16x8_t vm4 = vcltq_f16(vx4, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x96_acc2() 170 const uint16x8_t vm5 = vcltq_f16(vx5, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x96_acc2() 172 const uint16x8_t vm6 = vcltq_f16(vx6, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x96_acc2() 174 const uint16x8_t vm7 = vcltq_f16(vx7, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x96_acc2() 176 const uint16x8_t vm8 = vcltq_f16(vx8, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x96_acc2() 178 const uint16x8_t vm9 = vcltq_f16(vx9, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x96_acc2() [all …]
|
D | neonfp16arith-rr2-p2-x96-acc6.c | 164 const uint16x8_t vm0 = vcltq_f16(vx0, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x96_acc6() 166 const uint16x8_t vm1 = vcltq_f16(vx1, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x96_acc6() 168 const uint16x8_t vm2 = vcltq_f16(vx2, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x96_acc6() 170 const uint16x8_t vm3 = vcltq_f16(vx3, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x96_acc6() 172 const uint16x8_t vm4 = vcltq_f16(vx4, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x96_acc6() 174 const uint16x8_t vm5 = vcltq_f16(vx5, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x96_acc6() 176 const uint16x8_t vm6 = vcltq_f16(vx6, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x96_acc6() 178 const uint16x8_t vm7 = vcltq_f16(vx7, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x96_acc6() 180 const uint16x8_t vm8 = vcltq_f16(vx8, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x96_acc6() 182 const uint16x8_t vm9 = vcltq_f16(vx9, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x96_acc6() [all …]
|
D | neonfp16arith-rr2-p2-x80.c | 141 const uint16x8_t vm0 = vcltq_f16(vx0, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x80() 143 const uint16x8_t vm1 = vcltq_f16(vx1, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x80() 145 const uint16x8_t vm2 = vcltq_f16(vx2, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x80() 147 const uint16x8_t vm3 = vcltq_f16(vx3, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x80() 149 const uint16x8_t vm4 = vcltq_f16(vx4, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x80() 151 const uint16x8_t vm5 = vcltq_f16(vx5, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x80() 153 const uint16x8_t vm6 = vcltq_f16(vx6, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x80() 155 const uint16x8_t vm7 = vcltq_f16(vx7, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x80() 157 const uint16x8_t vm8 = vcltq_f16(vx8, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x80() 159 const uint16x8_t vm9 = vcltq_f16(vx9, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x80() [all …]
|
D | neonfp16arith-rr2-p2-x80-acc5.c | 145 const uint16x8_t vm0 = vcltq_f16(vx0, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x80_acc5() 147 const uint16x8_t vm1 = vcltq_f16(vx1, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x80_acc5() 149 const uint16x8_t vm2 = vcltq_f16(vx2, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x80_acc5() 151 const uint16x8_t vm3 = vcltq_f16(vx3, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x80_acc5() 153 const uint16x8_t vm4 = vcltq_f16(vx4, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x80_acc5() 155 const uint16x8_t vm5 = vcltq_f16(vx5, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x80_acc5() 157 const uint16x8_t vm6 = vcltq_f16(vx6, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x80_acc5() 159 const uint16x8_t vm7 = vcltq_f16(vx7, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x80_acc5() 161 const uint16x8_t vm8 = vcltq_f16(vx8, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x80_acc5() 163 const uint16x8_t vm9 = vcltq_f16(vx9, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x80_acc5() [all …]
|
D | neonfp16arith-rr2-p2-x80-acc2.c | 142 const uint16x8_t vm0 = vcltq_f16(vx0, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x80_acc2() 144 const uint16x8_t vm1 = vcltq_f16(vx1, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x80_acc2() 146 const uint16x8_t vm2 = vcltq_f16(vx2, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x80_acc2() 148 const uint16x8_t vm3 = vcltq_f16(vx3, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x80_acc2() 150 const uint16x8_t vm4 = vcltq_f16(vx4, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x80_acc2() 152 const uint16x8_t vm5 = vcltq_f16(vx5, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x80_acc2() 154 const uint16x8_t vm6 = vcltq_f16(vx6, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x80_acc2() 156 const uint16x8_t vm7 = vcltq_f16(vx7, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x80_acc2() 158 const uint16x8_t vm8 = vcltq_f16(vx8, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x80_acc2() 160 const uint16x8_t vm9 = vcltq_f16(vx9, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x80_acc2() [all …]
|
D | neonfp16arith-rr2-p2-x72.c | 132 const uint16x8_t vm0 = vcltq_f16(vx0, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x72() 134 const uint16x8_t vm1 = vcltq_f16(vx1, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x72() 136 const uint16x8_t vm2 = vcltq_f16(vx2, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x72() 138 const uint16x8_t vm3 = vcltq_f16(vx3, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x72() 140 const uint16x8_t vm4 = vcltq_f16(vx4, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x72() 142 const uint16x8_t vm5 = vcltq_f16(vx5, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x72() 144 const uint16x8_t vm6 = vcltq_f16(vx6, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x72() 146 const uint16x8_t vm7 = vcltq_f16(vx7, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x72() 148 const uint16x8_t vm8 = vcltq_f16(vx8, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x72() 198 const uint16x8_t vm = vcltq_f16(vx, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x72() [all …]
|
D | neonfp16arith-rr2-p2-x72-acc3.c | 134 const uint16x8_t vm0 = vcltq_f16(vx0, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x72_acc3() 136 const uint16x8_t vm1 = vcltq_f16(vx1, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x72_acc3() 138 const uint16x8_t vm2 = vcltq_f16(vx2, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x72_acc3() 140 const uint16x8_t vm3 = vcltq_f16(vx3, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x72_acc3() 142 const uint16x8_t vm4 = vcltq_f16(vx4, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x72_acc3() 144 const uint16x8_t vm5 = vcltq_f16(vx5, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x72_acc3() 146 const uint16x8_t vm6 = vcltq_f16(vx6, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x72_acc3() 148 const uint16x8_t vm7 = vcltq_f16(vx7, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x72_acc3() 150 const uint16x8_t vm8 = vcltq_f16(vx8, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x72_acc3() 202 const uint16x8_t vm = vcltq_f16(vx, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x72_acc3() [all …]
|
D | neonfp16arith-rr2-p2-x64-acc4.c | 126 const uint16x8_t vm0 = vcltq_f16(vx0, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x64_acc4() 128 const uint16x8_t vm1 = vcltq_f16(vx1, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x64_acc4() 130 const uint16x8_t vm2 = vcltq_f16(vx2, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x64_acc4() 132 const uint16x8_t vm3 = vcltq_f16(vx3, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x64_acc4() 134 const uint16x8_t vm4 = vcltq_f16(vx4, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x64_acc4() 136 const uint16x8_t vm5 = vcltq_f16(vx5, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x64_acc4() 138 const uint16x8_t vm6 = vcltq_f16(vx6, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x64_acc4() 140 const uint16x8_t vm7 = vcltq_f16(vx7, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x64_acc4() 190 const uint16x8_t vm = vcltq_f16(vx, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x64_acc4() 216 const uint16x8_t vm = vcltq_f16(vx, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x64_acc4()
|
D | neonfp16arith-rr2-p2-x64-acc2.c | 124 const uint16x8_t vm0 = vcltq_f16(vx0, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x64_acc2() 126 const uint16x8_t vm1 = vcltq_f16(vx1, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x64_acc2() 128 const uint16x8_t vm2 = vcltq_f16(vx2, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x64_acc2() 130 const uint16x8_t vm3 = vcltq_f16(vx3, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x64_acc2() 132 const uint16x8_t vm4 = vcltq_f16(vx4, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x64_acc2() 134 const uint16x8_t vm5 = vcltq_f16(vx5, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x64_acc2() 136 const uint16x8_t vm6 = vcltq_f16(vx6, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x64_acc2() 138 const uint16x8_t vm7 = vcltq_f16(vx7, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x64_acc2() 186 const uint16x8_t vm = vcltq_f16(vx, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x64_acc2() 212 const uint16x8_t vm = vcltq_f16(vx, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x64_acc2()
|
D | neonfp16arith-rr2-p2-x64.c | 123 const uint16x8_t vm0 = vcltq_f16(vx0, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x64() 125 const uint16x8_t vm1 = vcltq_f16(vx1, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x64() 127 const uint16x8_t vm2 = vcltq_f16(vx2, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x64() 129 const uint16x8_t vm3 = vcltq_f16(vx3, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x64() 131 const uint16x8_t vm4 = vcltq_f16(vx4, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x64() 133 const uint16x8_t vm5 = vcltq_f16(vx5, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x64() 135 const uint16x8_t vm6 = vcltq_f16(vx6, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x64() 137 const uint16x8_t vm7 = vcltq_f16(vx7, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x64() 184 const uint16x8_t vm = vcltq_f16(vx, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x64() 210 const uint16x8_t vm = vcltq_f16(vx, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x64()
|
D | neonfp16arith-rr2-p2-x48.c | 105 const uint16x8_t vm0 = vcltq_f16(vx0, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x48() 107 const uint16x8_t vm1 = vcltq_f16(vx1, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x48() 109 const uint16x8_t vm2 = vcltq_f16(vx2, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x48() 111 const uint16x8_t vm3 = vcltq_f16(vx3, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x48() 113 const uint16x8_t vm4 = vcltq_f16(vx4, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x48() 115 const uint16x8_t vm5 = vcltq_f16(vx5, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x48() 156 const uint16x8_t vm = vcltq_f16(vx, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x48() 182 const uint16x8_t vm = vcltq_f16(vx, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x48()
|
D | neonfp16arith-rr2-p2-x40.c | 96 const uint16x8_t vm0 = vcltq_f16(vx0, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x40() 98 const uint16x8_t vm1 = vcltq_f16(vx1, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x40() 100 const uint16x8_t vm2 = vcltq_f16(vx2, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x40() 102 const uint16x8_t vm3 = vcltq_f16(vx3, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x40() 104 const uint16x8_t vm4 = vcltq_f16(vx4, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x40() 142 const uint16x8_t vm = vcltq_f16(vx, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x40() 168 const uint16x8_t vm = vcltq_f16(vx, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x40()
|
D | neonfp16arith-rr2-p2-x40-acc2.c | 97 const uint16x8_t vm0 = vcltq_f16(vx0, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x40_acc2() 99 const uint16x8_t vm1 = vcltq_f16(vx1, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x40_acc2() 101 const uint16x8_t vm2 = vcltq_f16(vx2, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x40_acc2() 103 const uint16x8_t vm3 = vcltq_f16(vx3, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x40_acc2() 105 const uint16x8_t vm4 = vcltq_f16(vx4, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x40_acc2() 144 const uint16x8_t vm = vcltq_f16(vx, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x40_acc2() 170 const uint16x8_t vm = vcltq_f16(vx, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x40_acc2()
|
D | neonfp16arith-rr2-p2-x48-acc3.c | 107 const uint16x8_t vm0 = vcltq_f16(vx0, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x48_acc3() 109 const uint16x8_t vm1 = vcltq_f16(vx1, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x48_acc3() 111 const uint16x8_t vm2 = vcltq_f16(vx2, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x48_acc3() 113 const uint16x8_t vm3 = vcltq_f16(vx3, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x48_acc3() 115 const uint16x8_t vm4 = vcltq_f16(vx4, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x48_acc3() 117 const uint16x8_t vm5 = vcltq_f16(vx5, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x48_acc3() 160 const uint16x8_t vm = vcltq_f16(vx, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x48_acc3() 186 const uint16x8_t vm = vcltq_f16(vx, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x48_acc3()
|
D | neonfp16arith-rr2-p2-x48-acc2.c | 106 const uint16x8_t vm0 = vcltq_f16(vx0, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x48_acc2() 108 const uint16x8_t vm1 = vcltq_f16(vx1, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x48_acc2() 110 const uint16x8_t vm2 = vcltq_f16(vx2, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x48_acc2() 112 const uint16x8_t vm3 = vcltq_f16(vx3, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x48_acc2() 114 const uint16x8_t vm4 = vcltq_f16(vx4, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x48_acc2() 116 const uint16x8_t vm5 = vcltq_f16(vx5, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x48_acc2() 158 const uint16x8_t vm = vcltq_f16(vx, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x48_acc2() 184 const uint16x8_t vm = vcltq_f16(vx, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x48_acc2()
|
D | neonfp16arith-rr2-p2-x40-acc5.c | 100 const uint16x8_t vm0 = vcltq_f16(vx0, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x40_acc5() 102 const uint16x8_t vm1 = vcltq_f16(vx1, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x40_acc5() 104 const uint16x8_t vm2 = vcltq_f16(vx2, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x40_acc5() 106 const uint16x8_t vm3 = vcltq_f16(vx3, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x40_acc5() 108 const uint16x8_t vm4 = vcltq_f16(vx4, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x40_acc5() 150 const uint16x8_t vm = vcltq_f16(vx, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x40_acc5() 176 const uint16x8_t vm = vcltq_f16(vx, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x40_acc5()
|
D | neonfp16arith-rr2-p2-x32.c | 87 const uint16x8_t vm0 = vcltq_f16(vx0, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x32() 89 const uint16x8_t vm1 = vcltq_f16(vx1, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x32() 91 const uint16x8_t vm2 = vcltq_f16(vx2, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x32() 93 const uint16x8_t vm3 = vcltq_f16(vx3, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x32() 128 const uint16x8_t vm = vcltq_f16(vx, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x32() 154 const uint16x8_t vm = vcltq_f16(vx, vdenorm_cutoff); in xnn_f16_raddstoreexpminusmax_ukernel__neonfp16arith_rr2_p2_x32()
|
/external/XNNPACK/src/f16-vsigmoid/gen/ |
D | vsigmoid-neonfp16arith-rr2-p2-div-x64.c | 155 const uint16x8_t vm0 = vcltq_f16(vx0, vmovq_n_f16(0.0f)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x64() 156 const uint16x8_t vm1 = vcltq_f16(vx1, vmovq_n_f16(0.0f)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x64() 157 const uint16x8_t vm2 = vcltq_f16(vx2, vmovq_n_f16(0.0f)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x64() 158 const uint16x8_t vm3 = vcltq_f16(vx3, vmovq_n_f16(0.0f)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x64() 159 const uint16x8_t vm4 = vcltq_f16(vx4, vmovq_n_f16(0.0f)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x64() 160 const uint16x8_t vm5 = vcltq_f16(vx5, vmovq_n_f16(0.0f)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x64() 161 const uint16x8_t vm6 = vcltq_f16(vx6, vmovq_n_f16(0.0f)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x64() 162 const uint16x8_t vm7 = vcltq_f16(vx7, vmovq_n_f16(0.0f)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x64() 201 const uint16x8_t vm = vcltq_f16(vx, vmovq_n_f16(0.0f)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x64() 225 const uint16x8_t vm = vcltq_f16(vx, vmovq_n_f16(0.0f)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x64()
|
D | vsigmoid-neonfp16arith-rr2-p2-div-x56.c | 142 const uint16x8_t vm0 = vcltq_f16(vx0, vmovq_n_f16(0.0f)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x56() 143 const uint16x8_t vm1 = vcltq_f16(vx1, vmovq_n_f16(0.0f)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x56() 144 const uint16x8_t vm2 = vcltq_f16(vx2, vmovq_n_f16(0.0f)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x56() 145 const uint16x8_t vm3 = vcltq_f16(vx3, vmovq_n_f16(0.0f)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x56() 146 const uint16x8_t vm4 = vcltq_f16(vx4, vmovq_n_f16(0.0f)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x56() 147 const uint16x8_t vm5 = vcltq_f16(vx5, vmovq_n_f16(0.0f)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x56() 148 const uint16x8_t vm6 = vcltq_f16(vx6, vmovq_n_f16(0.0f)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x56() 185 const uint16x8_t vm = vcltq_f16(vx, vmovq_n_f16(0.0f)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x56() 209 const uint16x8_t vm = vcltq_f16(vx, vmovq_n_f16(0.0f)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x56()
|
D | vsigmoid-neonfp16arith-rr2-p2-nr1fma-x64.c | 182 const uint16x8_t vm0 = vcltq_f16(vx0, vmovq_n_f16(0.0f)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_x64() 183 const uint16x8_t vm1 = vcltq_f16(vx1, vmovq_n_f16(0.0f)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_x64() 184 const uint16x8_t vm2 = vcltq_f16(vx2, vmovq_n_f16(0.0f)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_x64() 185 const uint16x8_t vm3 = vcltq_f16(vx3, vmovq_n_f16(0.0f)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_x64() 186 const uint16x8_t vm4 = vcltq_f16(vx4, vmovq_n_f16(0.0f)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_x64() 187 const uint16x8_t vm5 = vcltq_f16(vx5, vmovq_n_f16(0.0f)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_x64() 188 const uint16x8_t vm6 = vcltq_f16(vx6, vmovq_n_f16(0.0f)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_x64() 189 const uint16x8_t vm7 = vcltq_f16(vx7, vmovq_n_f16(0.0f)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_x64() 232 const uint16x8_t vm = vcltq_f16(vx, vmovq_n_f16(0.0f)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_x64() 260 const uint16x8_t vm = vcltq_f16(vx, vmovq_n_f16(0.0f)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1fma_x64()
|
D | vsigmoid-neonfp16arith-rr2-p2-nr1recps-x64.c | 182 const uint16x8_t vm0 = vcltq_f16(vx0, vmovq_n_f16(0.0f)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1recps_x64() 183 const uint16x8_t vm1 = vcltq_f16(vx1, vmovq_n_f16(0.0f)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1recps_x64() 184 const uint16x8_t vm2 = vcltq_f16(vx2, vmovq_n_f16(0.0f)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1recps_x64() 185 const uint16x8_t vm3 = vcltq_f16(vx3, vmovq_n_f16(0.0f)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1recps_x64() 186 const uint16x8_t vm4 = vcltq_f16(vx4, vmovq_n_f16(0.0f)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1recps_x64() 187 const uint16x8_t vm5 = vcltq_f16(vx5, vmovq_n_f16(0.0f)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1recps_x64() 188 const uint16x8_t vm6 = vcltq_f16(vx6, vmovq_n_f16(0.0f)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1recps_x64() 189 const uint16x8_t vm7 = vcltq_f16(vx7, vmovq_n_f16(0.0f)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1recps_x64() 232 const uint16x8_t vm = vcltq_f16(vx, vmovq_n_f16(0.0f)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1recps_x64() 260 const uint16x8_t vm = vcltq_f16(vx, vmovq_n_f16(0.0f)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_nr1recps_x64()
|
D | vsigmoid-neonfp16arith-rr2-p2-div-x48.c | 129 const uint16x8_t vm0 = vcltq_f16(vx0, vmovq_n_f16(0.0f)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x48() 130 const uint16x8_t vm1 = vcltq_f16(vx1, vmovq_n_f16(0.0f)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x48() 131 const uint16x8_t vm2 = vcltq_f16(vx2, vmovq_n_f16(0.0f)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x48() 132 const uint16x8_t vm3 = vcltq_f16(vx3, vmovq_n_f16(0.0f)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x48() 133 const uint16x8_t vm4 = vcltq_f16(vx4, vmovq_n_f16(0.0f)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x48() 134 const uint16x8_t vm5 = vcltq_f16(vx5, vmovq_n_f16(0.0f)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x48() 169 const uint16x8_t vm = vcltq_f16(vx, vmovq_n_f16(0.0f)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x48() 193 const uint16x8_t vm = vcltq_f16(vx, vmovq_n_f16(0.0f)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x48()
|
D | vsigmoid-neonfp16arith-rr2-p2-div-x40.c | 116 const uint16x8_t vm0 = vcltq_f16(vx0, vmovq_n_f16(0.0f)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x40() 117 const uint16x8_t vm1 = vcltq_f16(vx1, vmovq_n_f16(0.0f)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x40() 118 const uint16x8_t vm2 = vcltq_f16(vx2, vmovq_n_f16(0.0f)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x40() 119 const uint16x8_t vm3 = vcltq_f16(vx3, vmovq_n_f16(0.0f)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x40() 120 const uint16x8_t vm4 = vcltq_f16(vx4, vmovq_n_f16(0.0f)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x40() 153 const uint16x8_t vm = vcltq_f16(vx, vmovq_n_f16(0.0f)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x40() 177 const uint16x8_t vm = vcltq_f16(vx, vmovq_n_f16(0.0f)); in xnn_f16_vsigmoid_ukernel__neonfp16arith_rr2_p2_div_x40()
|