/external/XNNPACK/src/f32-spmm/gen/ |
D | 4x4-neonfma.c | 132 vout01c0 = vmax_f32(vout01c0, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_4x4__neonfma() 133 vout01c1 = vmax_f32(vout01c1, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_4x4__neonfma() 134 vout01c2 = vmax_f32(vout01c2, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_4x4__neonfma() 135 vout01c3 = vmax_f32(vout01c3, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_4x4__neonfma() 160 vout01 = vmax_f32(vout01, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_4x4__neonfma() 200 vout0c0 = vmax_f32(vout0c0, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_4x4__neonfma() 201 vout0c1 = vmax_f32(vout0c1, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_4x4__neonfma() 202 vout0c2 = vmax_f32(vout0c2, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_4x4__neonfma() 203 vout0c3 = vmax_f32(vout0c3, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_4x4__neonfma() 228 vout0 = vmax_f32(vout0, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_4x4__neonfma()
|
D | 8x4-neonfma.c | 228 vout01c0 = vmax_f32(vout01c0, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_8x4__neonfma() 229 vout01c1 = vmax_f32(vout01c1, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_8x4__neonfma() 230 vout01c2 = vmax_f32(vout01c2, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_8x4__neonfma() 231 vout01c3 = vmax_f32(vout01c3, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_8x4__neonfma() 256 vout01 = vmax_f32(vout01, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_8x4__neonfma() 296 vout0c0 = vmax_f32(vout0c0, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_8x4__neonfma() 297 vout0c1 = vmax_f32(vout0c1, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_8x4__neonfma() 298 vout0c2 = vmax_f32(vout0c2, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_8x4__neonfma() 299 vout0c3 = vmax_f32(vout0c3, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_8x4__neonfma() 324 vout0 = vmax_f32(vout0, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_8x4__neonfma()
|
D | 4x2-neonfma.c | 116 vout01c0 = vmax_f32(vout01c0, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_4x2__neonfma() 117 vout01c1 = vmax_f32(vout01c1, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_4x2__neonfma() 140 vout01 = vmax_f32(vout01, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_4x2__neonfma() 174 vout0c0 = vmax_f32(vout0c0, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_4x2__neonfma() 175 vout0c1 = vmax_f32(vout0c1, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_4x2__neonfma() 198 vout0 = vmax_f32(vout0, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_4x2__neonfma()
|
D | 8x2-neonfma.c | 192 vout01c0 = vmax_f32(vout01c0, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_8x2__neonfma() 193 vout01c1 = vmax_f32(vout01c1, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_8x2__neonfma() 216 vout01 = vmax_f32(vout01, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_8x2__neonfma() 250 vout0c0 = vmax_f32(vout0c0, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_8x2__neonfma() 251 vout0c1 = vmax_f32(vout0c1, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_8x2__neonfma() 274 vout0 = vmax_f32(vout0, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_8x2__neonfma()
|
D | 12x4-neonfma.c | 351 vout01c0 = vmax_f32(vout01c0, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_12x4__neonfma() 352 vout01c1 = vmax_f32(vout01c1, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_12x4__neonfma() 353 vout01c2 = vmax_f32(vout01c2, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_12x4__neonfma() 354 vout01c3 = vmax_f32(vout01c3, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_12x4__neonfma() 379 vout01 = vmax_f32(vout01, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_12x4__neonfma() 419 vout0c0 = vmax_f32(vout0c0, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_12x4__neonfma() 420 vout0c1 = vmax_f32(vout0c1, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_12x4__neonfma() 421 vout0c2 = vmax_f32(vout0c2, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_12x4__neonfma() 422 vout0c3 = vmax_f32(vout0c3, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_12x4__neonfma() 447 vout0 = vmax_f32(vout0, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_12x4__neonfma()
|
D | 16x4-neonfma.c | 379 vout01c0 = vmax_f32(vout01c0, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_16x4__neonfma() 380 vout01c1 = vmax_f32(vout01c1, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_16x4__neonfma() 381 vout01c2 = vmax_f32(vout01c2, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_16x4__neonfma() 382 vout01c3 = vmax_f32(vout01c3, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_16x4__neonfma() 407 vout01 = vmax_f32(vout01, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_16x4__neonfma() 447 vout0c0 = vmax_f32(vout0c0, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_16x4__neonfma() 448 vout0c1 = vmax_f32(vout0c1, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_16x4__neonfma() 449 vout0c2 = vmax_f32(vout0c2, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_16x4__neonfma() 450 vout0c3 = vmax_f32(vout0c3, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_16x4__neonfma() 475 vout0 = vmax_f32(vout0, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_16x4__neonfma()
|
D | 12x2-neonfma.c | 285 vout01c0 = vmax_f32(vout01c0, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_12x2__neonfma() 286 vout01c1 = vmax_f32(vout01c1, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_12x2__neonfma() 309 vout01 = vmax_f32(vout01, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_12x2__neonfma() 343 vout0c0 = vmax_f32(vout0c0, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_12x2__neonfma() 344 vout0c1 = vmax_f32(vout0c1, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_12x2__neonfma() 367 vout0 = vmax_f32(vout0, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_12x2__neonfma()
|
D | 16x2-neonfma.c | 303 vout01c0 = vmax_f32(vout01c0, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_16x2__neonfma() 304 vout01c1 = vmax_f32(vout01c1, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_16x2__neonfma() 327 vout01 = vmax_f32(vout01, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_16x2__neonfma() 361 vout0c0 = vmax_f32(vout0c0, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_16x2__neonfma() 362 vout0c1 = vmax_f32(vout0c1, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_16x2__neonfma() 385 vout0 = vmax_f32(vout0, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_16x2__neonfma()
|
D | 4x1-neonfma.c | 78 vout01 = vmax_f32(vout01, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_4x1__neonfma() 104 vout0 = vmax_f32(vout0, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_4x1__neonfma()
|
D | 4x1-neonfma-pipelined.c | 83 vout01 = vmax_f32(vout01, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_4x1__neonfma_pipelined() 109 vout0 = vmax_f32(vout0, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_4x1__neonfma_pipelined()
|
D | 4x1-neonfma-unroll2.c | 94 vout01 = vmax_f32(vout01, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_4x1__neonfma_unroll2() 120 vout0 = vmax_f32(vout0, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_4x1__neonfma_unroll2()
|
D | 8x1-neonfma.c | 110 vout01 = vmax_f32(vout01, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_8x1__neonfma() 136 vout0 = vmax_f32(vout0, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_8x1__neonfma()
|
D | 8x1-neonfma-pipelined.c | 116 vout01 = vmax_f32(vout01, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_8x1__neonfma_pipelined() 142 vout0 = vmax_f32(vout0, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_8x1__neonfma_pipelined()
|
D | 12x1-neonfma.c | 148 vout01 = vmax_f32(vout01, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_12x1__neonfma() 174 vout0 = vmax_f32(vout0, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_12x1__neonfma()
|
D | 8x1-neonfma-unroll2.c | 133 vout01 = vmax_f32(vout01, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_8x1__neonfma_unroll2() 159 vout0 = vmax_f32(vout0, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_8x1__neonfma_unroll2()
|
D | 16x1-neonfma.c | 155 vout01 = vmax_f32(vout01, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_16x1__neonfma() 181 vout0 = vmax_f32(vout0, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_16x1__neonfma()
|
D | 16x1-neonfma-pipelined.c | 164 vout01 = vmax_f32(vout01, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_16x1__neonfma_pipelined() 190 vout0 = vmax_f32(vout0, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_16x1__neonfma_pipelined()
|
D | 16x1-neonfma-unroll2.c | 194 vout01 = vmax_f32(vout01, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_16x1__neonfma_unroll2() 220 vout0 = vmax_f32(vout0, vget_low_f32(vmin)); in xnn_f32_spmm_ukernel_16x1__neonfma_unroll2()
|
/external/XNNPACK/src/f32-gemm/gen/ |
D | 4x2-neon-lane-ld64.c | 107 vacc0x01 = vmax_f32(vacc0x01, vmin); in xnn_f32_gemm_ukernel_4x2__neon_lane_ld64() 108 vacc1x01 = vmax_f32(vacc1x01, vmin); in xnn_f32_gemm_ukernel_4x2__neon_lane_ld64() 109 vacc2x01 = vmax_f32(vacc2x01, vmin); in xnn_f32_gemm_ukernel_4x2__neon_lane_ld64() 110 vacc3x01 = vmax_f32(vacc3x01, vmin); in xnn_f32_gemm_ukernel_4x2__neon_lane_ld64()
|
D | 4x2-neonfma-lane-ld64.c | 129 vacc0x01 = vmax_f32(vacc0x01, vmin); in xnn_f32_gemm_ukernel_4x2__neonfma_lane_ld64() 130 vacc1x01 = vmax_f32(vacc1x01, vmin); in xnn_f32_gemm_ukernel_4x2__neonfma_lane_ld64() 131 vacc2x01 = vmax_f32(vacc2x01, vmin); in xnn_f32_gemm_ukernel_4x2__neonfma_lane_ld64() 132 vacc3x01 = vmax_f32(vacc3x01, vmin); in xnn_f32_gemm_ukernel_4x2__neonfma_lane_ld64()
|
/external/XNNPACK/src/f32-igemm/gen/ |
D | 4x2-neon-lane-ld64.c | 131 vacc0x01 = vmax_f32(vacc0x01, vmin); in xnn_f32_igemm_ukernel_4x2__neon_lane_ld64() 132 vacc1x01 = vmax_f32(vacc1x01, vmin); in xnn_f32_igemm_ukernel_4x2__neon_lane_ld64() 133 vacc2x01 = vmax_f32(vacc2x01, vmin); in xnn_f32_igemm_ukernel_4x2__neon_lane_ld64() 134 vacc3x01 = vmax_f32(vacc3x01, vmin); in xnn_f32_igemm_ukernel_4x2__neon_lane_ld64()
|
D | 4x2-neonfma-lane-ld64.c | 153 vacc0x01 = vmax_f32(vacc0x01, vmin); in xnn_f32_igemm_ukernel_4x2__neonfma_lane_ld64() 154 vacc1x01 = vmax_f32(vacc1x01, vmin); in xnn_f32_igemm_ukernel_4x2__neonfma_lane_ld64() 155 vacc2x01 = vmax_f32(vacc2x01, vmin); in xnn_f32_igemm_ukernel_4x2__neonfma_lane_ld64() 156 vacc3x01 = vmax_f32(vacc3x01, vmin); in xnn_f32_igemm_ukernel_4x2__neonfma_lane_ld64()
|
/external/XNNPACK/src/f32-rmax/ |
D | neon.c | 45 float32x2_t vmax_lo = vmax_f32(vget_low_f32(vmax), vget_high_f32(vmax)); in xnn_f32_rmax_ukernel__neon() 50 vmax_lo = vmax_f32(vmax_lo, vx); in xnn_f32_rmax_ukernel__neon()
|
/external/XNNPACK/src/f32-gavgpool-spchw/ |
D | neon-x4.c | 119 vout = vmax_f32(vout, vget_low_f32(voutput_min)); in xnn_f32_gavgpool_spchw_ukernel__neon_x4()
|
/external/XNNPACK/src/f32-spmm/ |
D | neon-blocked.c.in | 181 vout${ABC[0:SUBMR]}c${N} = vmax_f32(vout${ABC[0:SUBMR]}c${N}, vget_low_f32(vmin)); 231 vout${ABC[0:SUBMR]} = vmax_f32(vout${ABC[0:SUBMR]}, vget_low_f32(vmin));
|