/external/XNNPACK/src/f32-gavgpool-cw/ |
D | neon-x4.c | 78 const float32x4_t vsum01 = vcombine_f32(vadd_f32(vget_low_f32(vsum0), vget_high_f32(vsum0)), in xnn_f32_gavgpool_cw_ukernel__neon_x4() 79 vadd_f32(vget_low_f32(vsum1), vget_high_f32(vsum1))); in xnn_f32_gavgpool_cw_ukernel__neon_x4() 80 const float32x4_t vsum23 = vcombine_f32(vadd_f32(vget_low_f32(vsum2), vget_high_f32(vsum2)), in xnn_f32_gavgpool_cw_ukernel__neon_x4() 81 vadd_f32(vget_low_f32(vsum3), vget_high_f32(vsum3))); in xnn_f32_gavgpool_cw_ukernel__neon_x4() 114 float32x2_t vsum = vadd_f32(vget_low_f32(vsum0), vget_high_f32(vsum0)); in xnn_f32_gavgpool_cw_ukernel__neon_x4()
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | neonfma-p5-x4.c | 96 float32x2_t vacc_lo = vadd_f32(vget_high_f32(vacc), vget_low_f32(vacc)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x4() 153 vacc_lo = vadd_f32(vacc_lo, vf_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x4() 166 … vacc_lo = vadd_f32(vacc_lo, vreinterpret_f32_u64(vshl_n_u64(vreinterpret_u64_f32(vf_lo), 32))); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x4()
|
D | neon-p5-x4.c | 97 float32x2_t vacc_lo = vadd_f32(vget_high_f32(vacc), vget_low_f32(vacc)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x4() 154 vacc_lo = vadd_f32(vacc_lo, vf_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x4() 167 … vacc_lo = vadd_f32(vacc_lo, vreinterpret_f32_u64(vshl_n_u64(vreinterpret_u64_f32(vf_lo), 32))); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x4()
|
D | neonfma-lut64-p2-x4.c | 115 float32x2_t vacc_lo = vadd_f32(vget_high_f32(vacc), vget_low_f32(vacc)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x4() 191 vacc_lo = vadd_f32(vacc_lo, vf_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x4() 204 … vacc_lo = vadd_f32(vacc_lo, vreinterpret_f32_u64(vshl_n_u64(vreinterpret_u64_f32(vf_lo), 32))); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x4()
|
D | neon-lut64-p2-x4.c | 116 float32x2_t vacc_lo = vadd_f32(vget_high_f32(vacc), vget_low_f32(vacc)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x4() 192 vacc_lo = vadd_f32(vacc_lo, vf_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x4() 205 … vacc_lo = vadd_f32(vacc_lo, vreinterpret_f32_u64(vshl_n_u64(vreinterpret_u64_f32(vf_lo), 32))); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x4()
|
D | neonfma-p5-x8-acc2.c | 172 float32x2_t vacc_lo = vadd_f32(vget_high_f32(vacc), vget_low_f32(vacc)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8_acc2() 229 vacc_lo = vadd_f32(vacc_lo, vf_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8_acc2() 242 … vacc_lo = vadd_f32(vacc_lo, vreinterpret_f32_u64(vshl_n_u64(vreinterpret_u64_f32(vf_lo), 32))); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8_acc2()
|
D | neon-p5-x8.c | 170 float32x2_t vacc_lo = vadd_f32(vget_high_f32(vacc), vget_low_f32(vacc)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8() 227 vacc_lo = vadd_f32(vacc_lo, vf_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8() 240 … vacc_lo = vadd_f32(vacc_lo, vreinterpret_f32_u64(vshl_n_u64(vreinterpret_u64_f32(vf_lo), 32))); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8()
|
D | neon-p5-x8-acc2.c | 173 float32x2_t vacc_lo = vadd_f32(vget_high_f32(vacc), vget_low_f32(vacc)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2() 230 vacc_lo = vadd_f32(vacc_lo, vf_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2() 243 … vacc_lo = vadd_f32(vacc_lo, vreinterpret_f32_u64(vshl_n_u64(vreinterpret_u64_f32(vf_lo), 32))); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x8_acc2()
|
D | neonfma-p5-x8.c | 169 float32x2_t vacc_lo = vadd_f32(vget_high_f32(vacc), vget_low_f32(vacc)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8() 226 vacc_lo = vadd_f32(vacc_lo, vf_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8() 239 … vacc_lo = vadd_f32(vacc_lo, vreinterpret_f32_u64(vshl_n_u64(vreinterpret_u64_f32(vf_lo), 32))); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x8()
|
D | neonfma-p5-x12.c | 185 float32x2_t vacc_lo = vadd_f32(vget_high_f32(vacc), vget_low_f32(vacc)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12() 242 vacc_lo = vadd_f32(vacc_lo, vf_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12() 255 … vacc_lo = vadd_f32(vacc_lo, vreinterpret_f32_u64(vshl_n_u64(vreinterpret_u64_f32(vf_lo), 32))); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12()
|
D | neon-p5-x12-acc2.c | 189 float32x2_t vacc_lo = vadd_f32(vget_high_f32(vacc), vget_low_f32(vacc)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2() 246 vacc_lo = vadd_f32(vacc_lo, vf_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2() 259 … vacc_lo = vadd_f32(vacc_lo, vreinterpret_f32_u64(vshl_n_u64(vreinterpret_u64_f32(vf_lo), 32))); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc2()
|
D | neonfma-p5-x12-acc2.c | 188 float32x2_t vacc_lo = vadd_f32(vget_high_f32(vacc), vget_low_f32(vacc)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc2() 245 vacc_lo = vadd_f32(vacc_lo, vf_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc2() 258 … vacc_lo = vadd_f32(vacc_lo, vreinterpret_f32_u64(vshl_n_u64(vreinterpret_u64_f32(vf_lo), 32))); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc2()
|
D | neon-p5-x12.c | 186 float32x2_t vacc_lo = vadd_f32(vget_high_f32(vacc), vget_low_f32(vacc)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12() 243 vacc_lo = vadd_f32(vacc_lo, vf_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12() 256 … vacc_lo = vadd_f32(vacc_lo, vreinterpret_f32_u64(vshl_n_u64(vreinterpret_u64_f32(vf_lo), 32))); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12()
|
D | neon-lut64-p2-x8.c | 214 float32x2_t vacc_lo = vadd_f32(vget_high_f32(vacc), vget_low_f32(vacc)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8() 290 vacc_lo = vadd_f32(vacc_lo, vf_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8() 303 … vacc_lo = vadd_f32(vacc_lo, vreinterpret_f32_u64(vshl_n_u64(vreinterpret_u64_f32(vf_lo), 32))); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8()
|
D | neonfma-lut64-p2-x8-acc2.c | 216 float32x2_t vacc_lo = vadd_f32(vget_high_f32(vacc), vget_low_f32(vacc)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8_acc2() 292 vacc_lo = vadd_f32(vacc_lo, vf_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8_acc2() 305 … vacc_lo = vadd_f32(vacc_lo, vreinterpret_f32_u64(vshl_n_u64(vreinterpret_u64_f32(vf_lo), 32))); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8_acc2()
|
D | neon-p5-x12-acc3.c | 191 float32x2_t vacc_lo = vadd_f32(vget_high_f32(vacc), vget_low_f32(vacc)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc3() 248 vacc_lo = vadd_f32(vacc_lo, vf_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc3() 261 … vacc_lo = vadd_f32(vacc_lo, vreinterpret_f32_u64(vshl_n_u64(vreinterpret_u64_f32(vf_lo), 32))); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x12_acc3()
|
D | neon-lut64-p2-x8-acc2.c | 217 float32x2_t vacc_lo = vadd_f32(vget_high_f32(vacc), vget_low_f32(vacc)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8_acc2() 293 vacc_lo = vadd_f32(vacc_lo, vf_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8_acc2() 306 … vacc_lo = vadd_f32(vacc_lo, vreinterpret_f32_u64(vshl_n_u64(vreinterpret_u64_f32(vf_lo), 32))); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8_acc2()
|
D | neonfma-p5-x12-acc3.c | 190 float32x2_t vacc_lo = vadd_f32(vget_high_f32(vacc), vget_low_f32(vacc)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc3() 247 vacc_lo = vadd_f32(vacc_lo, vf_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc3() 260 … vacc_lo = vadd_f32(vacc_lo, vreinterpret_f32_u64(vshl_n_u64(vreinterpret_u64_f32(vf_lo), 32))); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x12_acc3()
|
D | neonfma-lut64-p2-x8.c | 213 float32x2_t vacc_lo = vadd_f32(vget_high_f32(vacc), vget_low_f32(vacc)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8() 289 vacc_lo = vadd_f32(vacc_lo, vf_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8() 302 … vacc_lo = vadd_f32(vacc_lo, vreinterpret_f32_u64(vshl_n_u64(vreinterpret_u64_f32(vf_lo), 32))); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8()
|
D | neon-p5-x16.c | 202 float32x2_t vacc_lo = vadd_f32(vget_high_f32(vacc), vget_low_f32(vacc)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16() 259 vacc_lo = vadd_f32(vacc_lo, vf_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16() 272 … vacc_lo = vadd_f32(vacc_lo, vreinterpret_f32_u64(vshl_n_u64(vreinterpret_u64_f32(vf_lo), 32))); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16()
|
D | neonfma-p5-x16.c | 201 float32x2_t vacc_lo = vadd_f32(vget_high_f32(vacc), vget_low_f32(vacc)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16() 258 vacc_lo = vadd_f32(vacc_lo, vf_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16() 271 … vacc_lo = vadd_f32(vacc_lo, vreinterpret_f32_u64(vshl_n_u64(vreinterpret_u64_f32(vf_lo), 32))); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16()
|
D | neonfma-p5-x16-acc2.c | 204 float32x2_t vacc_lo = vadd_f32(vget_high_f32(vacc), vget_low_f32(vacc)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16_acc2() 261 vacc_lo = vadd_f32(vacc_lo, vf_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16_acc2() 274 … vacc_lo = vadd_f32(vacc_lo, vreinterpret_f32_u64(vshl_n_u64(vreinterpret_u64_f32(vf_lo), 32))); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16_acc2()
|
D | neon-p5-x16-acc2.c | 205 float32x2_t vacc_lo = vadd_f32(vget_high_f32(vacc), vget_low_f32(vacc)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc2() 262 vacc_lo = vadd_f32(vacc_lo, vf_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc2() 275 … vacc_lo = vadd_f32(vacc_lo, vreinterpret_f32_u64(vshl_n_u64(vreinterpret_u64_f32(vf_lo), 32))); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc2()
|
D | neonfma-p5-x16-acc4.c | 208 float32x2_t vacc_lo = vadd_f32(vget_high_f32(vacc), vget_low_f32(vacc)); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16_acc4() 265 vacc_lo = vadd_f32(vacc_lo, vf_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16_acc4() 278 … vacc_lo = vadd_f32(vacc_lo, vreinterpret_f32_u64(vshl_n_u64(vreinterpret_u64_f32(vf_lo), 32))); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_p5_x16_acc4()
|
D | neon-p5-x16-acc4.c | 209 float32x2_t vacc_lo = vadd_f32(vget_high_f32(vacc), vget_low_f32(vacc)); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc4() 266 vacc_lo = vadd_f32(vacc_lo, vf_lo); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc4() 279 … vacc_lo = vadd_f32(vacc_lo, vreinterpret_f32_u64(vshl_n_u64(vreinterpret_u64_f32(vf_lo), 32))); in xnn_f32_raddstoreexpminusmax_ukernel__neon_p5_x16_acc4()
|