/external/XNNPACK/src/f32-igemm/gen/ |
D | 6x2-minmax-neonfma-lane-ld64.c | 71 float32x2_t vacc4x01 = vacc0x01; in xnn_f32_igemm_minmax_ukernel_6x2__neonfma_lane_ld64() local 124 vacc4x01 = vfma_lane_f32(vacc4x01, vb01c0, va4, 0); in xnn_f32_igemm_minmax_ukernel_6x2__neonfma_lane_ld64() 137 vacc4x01 = vfma_f32(vacc4x01, va4c0, vb01c0); in xnn_f32_igemm_minmax_ukernel_6x2__neonfma_lane_ld64() 147 vacc4x01 = vfma_lane_f32(vacc4x01, vb01c1, va4, 1); in xnn_f32_igemm_minmax_ukernel_6x2__neonfma_lane_ld64() 160 vacc4x01 = vfma_f32(vacc4x01, va4c1, vb01c1); in xnn_f32_igemm_minmax_ukernel_6x2__neonfma_lane_ld64() 178 vacc4x01 = vfma_f32(vacc4x01, va4, vb01); in xnn_f32_igemm_minmax_ukernel_6x2__neonfma_lane_ld64() 189 vacc4x01 = vmin_f32(vacc4x01, vmax); in xnn_f32_igemm_minmax_ukernel_6x2__neonfma_lane_ld64() 197 vacc4x01 = vmax_f32(vacc4x01, vmin); in xnn_f32_igemm_minmax_ukernel_6x2__neonfma_lane_ld64() 203 vst1_f32(c4, vacc4x01); in xnn_f32_igemm_minmax_ukernel_6x2__neonfma_lane_ld64() 219 vst1_lane_f32(c4, vacc4x01, 0); in xnn_f32_igemm_minmax_ukernel_6x2__neonfma_lane_ld64()
|
D | 6x2-minmax-neon-lane-ld64.c | 71 float32x2_t vacc4x01 = vacc0x01; in xnn_f32_igemm_minmax_ukernel_6x2__neon_lane_ld64() local 123 vacc4x01 = vmla_lane_f32(vacc4x01, vb01c0, va4, 0); in xnn_f32_igemm_minmax_ukernel_6x2__neon_lane_ld64() 131 vacc4x01 = vmla_lane_f32(vacc4x01, vb01c1, va4, 1); in xnn_f32_igemm_minmax_ukernel_6x2__neon_lane_ld64() 148 vacc4x01 = vmla_f32(vacc4x01, va4, vb01); in xnn_f32_igemm_minmax_ukernel_6x2__neon_lane_ld64() 159 vacc4x01 = vmin_f32(vacc4x01, vmax); in xnn_f32_igemm_minmax_ukernel_6x2__neon_lane_ld64() 167 vacc4x01 = vmax_f32(vacc4x01, vmin); in xnn_f32_igemm_minmax_ukernel_6x2__neon_lane_ld64() 173 vst1_f32(c4, vacc4x01); in xnn_f32_igemm_minmax_ukernel_6x2__neon_lane_ld64() 189 vst1_lane_f32(c4, vacc4x01, 0); in xnn_f32_igemm_minmax_ukernel_6x2__neon_lane_ld64()
|
D | 6x8-minmax-neonfma-lane-ld64.c | 249 float32x2_t vacc4x01 = vget_low_f32(vacc4x0123); in xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64() local 256 vst1_f32(c4, vacc4x01); c4 += 2; in xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64() 263 vacc4x01 = vget_high_f32(vacc4x0123); in xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64() 271 vst1_lane_f32(c4, vacc4x01, 0); in xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld64()
|
D | 6x8-minmax-neon-lane-ld64.c | 249 float32x2_t vacc4x01 = vget_low_f32(vacc4x0123); in xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64() local 256 vst1_f32(c4, vacc4x01); c4 += 2; in xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64() 263 vacc4x01 = vget_high_f32(vacc4x0123); in xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64() 271 vst1_lane_f32(c4, vacc4x01, 0); in xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld64()
|
D | 6x8-minmax-neonfma-dup-ld64.c | 261 float32x2_t vacc4x01 = vget_low_f32(vacc4x0123); in xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64() local 268 vst1_f32(c4, vacc4x01); c4 += 2; in xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64() 275 vacc4x01 = vget_high_f32(vacc4x0123); in xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64() 283 vst1_lane_f32(c4, vacc4x01, 0); in xnn_f32_igemm_minmax_ukernel_6x8__neonfma_dup_ld64()
|
D | 6x8-minmax-neon-dup-ld64.c | 261 float32x2_t vacc4x01 = vget_low_f32(vacc4x0123); in xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64() local 268 vst1_f32(c4, vacc4x01); c4 += 2; in xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64() 275 vacc4x01 = vget_high_f32(vacc4x0123); in xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64() 283 vst1_lane_f32(c4, vacc4x01, 0); in xnn_f32_igemm_minmax_ukernel_6x8__neon_dup_ld64()
|
D | 6x8-minmax-neon-lane-ld128.c | 287 float32x2_t vacc4x01 = vget_low_f32(vacc4x0123); in xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld128() local 294 vst1_f32(c4, vacc4x01); c4 += 2; in xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld128() 301 vacc4x01 = vget_high_f32(vacc4x0123); in xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld128() 309 vst1_lane_f32(c4, vacc4x01, 0); in xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld128()
|
/external/XNNPACK/src/f32-gemm/gen/ |
D | 6x2-minmax-neonfma-lane-ld64.c | 77 float32x2_t vacc4x01 = vacc0x01; in xnn_f32_gemm_minmax_ukernel_6x2__neonfma_lane_ld64() local 96 vacc4x01 = vfma_lane_f32(vacc4x01, vb01c0, va4, 0); in xnn_f32_gemm_minmax_ukernel_6x2__neonfma_lane_ld64() 109 vacc4x01 = vfma_f32(vacc4x01, va4c0, vb01c0); in xnn_f32_gemm_minmax_ukernel_6x2__neonfma_lane_ld64() 119 vacc4x01 = vfma_lane_f32(vacc4x01, vb01c1, va4, 1); in xnn_f32_gemm_minmax_ukernel_6x2__neonfma_lane_ld64() 132 vacc4x01 = vfma_f32(vacc4x01, va4c1, vb01c1); in xnn_f32_gemm_minmax_ukernel_6x2__neonfma_lane_ld64() 150 vacc4x01 = vfma_f32(vacc4x01, va4, vb01); in xnn_f32_gemm_minmax_ukernel_6x2__neonfma_lane_ld64() 159 vacc4x01 = vmin_f32(vacc4x01, vmax); in xnn_f32_gemm_minmax_ukernel_6x2__neonfma_lane_ld64() 167 vacc4x01 = vmax_f32(vacc4x01, vmin); in xnn_f32_gemm_minmax_ukernel_6x2__neonfma_lane_ld64() 179 vst1_f32(c4, vacc4x01); in xnn_f32_gemm_minmax_ukernel_6x2__neonfma_lane_ld64() 198 vst1_lane_f32(c4, vacc4x01, 0); in xnn_f32_gemm_minmax_ukernel_6x2__neonfma_lane_ld64()
|
D | 6x2-minmax-neon-lane-ld64.c | 77 float32x2_t vacc4x01 = vacc0x01; in xnn_f32_gemm_minmax_ukernel_6x2__neon_lane_ld64() local 95 vacc4x01 = vmla_lane_f32(vacc4x01, vb01c0, va4, 0); in xnn_f32_gemm_minmax_ukernel_6x2__neon_lane_ld64() 103 vacc4x01 = vmla_lane_f32(vacc4x01, vb01c1, va4, 1); in xnn_f32_gemm_minmax_ukernel_6x2__neon_lane_ld64() 120 vacc4x01 = vmla_f32(vacc4x01, va4, vb01); in xnn_f32_gemm_minmax_ukernel_6x2__neon_lane_ld64() 129 vacc4x01 = vmin_f32(vacc4x01, vmax); in xnn_f32_gemm_minmax_ukernel_6x2__neon_lane_ld64() 137 vacc4x01 = vmax_f32(vacc4x01, vmin); in xnn_f32_gemm_minmax_ukernel_6x2__neon_lane_ld64() 149 vst1_f32(c4, vacc4x01); in xnn_f32_gemm_minmax_ukernel_6x2__neon_lane_ld64() 168 vst1_lane_f32(c4, vacc4x01, 0); in xnn_f32_gemm_minmax_ukernel_6x2__neon_lane_ld64()
|
D | 5x8-minmax-neon-lane-ld64.c | 197 float32x2_t vacc4x01 = vget_low_f32(vacc4x0123); in xnn_f32_gemm_minmax_ukernel_5x8__neon_lane_ld64() local 203 vst1_f32(c4, vacc4x01); c4 += 2; in xnn_f32_gemm_minmax_ukernel_5x8__neon_lane_ld64() 209 vacc4x01 = vget_high_f32(vacc4x0123); in xnn_f32_gemm_minmax_ukernel_5x8__neon_lane_ld64() 216 vst1_lane_f32(c4, vacc4x01, 0); in xnn_f32_gemm_minmax_ukernel_5x8__neon_lane_ld64()
|
D | 5x8-minmax-neonfma-lane-ld64.c | 197 float32x2_t vacc4x01 = vget_low_f32(vacc4x0123); in xnn_f32_gemm_minmax_ukernel_5x8__neonfma_lane_ld64() local 203 vst1_f32(c4, vacc4x01); c4 += 2; in xnn_f32_gemm_minmax_ukernel_5x8__neonfma_lane_ld64() 209 vacc4x01 = vget_high_f32(vacc4x0123); in xnn_f32_gemm_minmax_ukernel_5x8__neonfma_lane_ld64() 216 vst1_lane_f32(c4, vacc4x01, 0); in xnn_f32_gemm_minmax_ukernel_5x8__neonfma_lane_ld64()
|
D | 6x8-minmax-neonfma-lane-ld64.c | 224 float32x2_t vacc4x01 = vget_low_f32(vacc4x0123); in xnn_f32_gemm_minmax_ukernel_6x8__neonfma_lane_ld64() local 231 vst1_f32(c4, vacc4x01); c4 += 2; in xnn_f32_gemm_minmax_ukernel_6x8__neonfma_lane_ld64() 238 vacc4x01 = vget_high_f32(vacc4x0123); in xnn_f32_gemm_minmax_ukernel_6x8__neonfma_lane_ld64() 246 vst1_lane_f32(c4, vacc4x01, 0); in xnn_f32_gemm_minmax_ukernel_6x8__neonfma_lane_ld64()
|
D | 6x8-minmax-neon-lane-ld64.c | 224 float32x2_t vacc4x01 = vget_low_f32(vacc4x0123); in xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld64() local 231 vst1_f32(c4, vacc4x01); c4 += 2; in xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld64() 238 vacc4x01 = vget_high_f32(vacc4x0123); in xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld64() 246 vst1_lane_f32(c4, vacc4x01, 0); in xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld64()
|
D | 6x8-minmax-neonfma-dup-ld64.c | 236 float32x2_t vacc4x01 = vget_low_f32(vacc4x0123); in xnn_f32_gemm_minmax_ukernel_6x8__neonfma_dup_ld64() local 243 vst1_f32(c4, vacc4x01); c4 += 2; in xnn_f32_gemm_minmax_ukernel_6x8__neonfma_dup_ld64() 250 vacc4x01 = vget_high_f32(vacc4x0123); in xnn_f32_gemm_minmax_ukernel_6x8__neonfma_dup_ld64() 258 vst1_lane_f32(c4, vacc4x01, 0); in xnn_f32_gemm_minmax_ukernel_6x8__neonfma_dup_ld64()
|
D | 6x8-minmax-neon-dup-ld64.c | 236 float32x2_t vacc4x01 = vget_low_f32(vacc4x0123); in xnn_f32_gemm_minmax_ukernel_6x8__neon_dup_ld64() local 243 vst1_f32(c4, vacc4x01); c4 += 2; in xnn_f32_gemm_minmax_ukernel_6x8__neon_dup_ld64() 250 vacc4x01 = vget_high_f32(vacc4x0123); in xnn_f32_gemm_minmax_ukernel_6x8__neon_dup_ld64() 258 vst1_lane_f32(c4, vacc4x01, 0); in xnn_f32_gemm_minmax_ukernel_6x8__neon_dup_ld64()
|
D | 6x8-minmax-neon-lane-ld128.c | 262 float32x2_t vacc4x01 = vget_low_f32(vacc4x0123); in xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld128() local 269 vst1_f32(c4, vacc4x01); c4 += 2; in xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld128() 276 vacc4x01 = vget_high_f32(vacc4x0123); in xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld128() 284 vst1_lane_f32(c4, vacc4x01, 0); in xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld128()
|
/external/XNNPACK/src/f32-gemm/gen-inc/ |
D | 5x8inc-minmax-neonfma-lane-ld64.c | 199 float32x2_t vacc4x01 = vget_low_f32(vacc4x0123); in xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64() local 205 vst1_f32(c4, vacc4x01); c4 += 2; in xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64() 211 vacc4x01 = vget_high_f32(vacc4x0123); in xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64() 218 vst1_lane_f32(c4, vacc4x01, 0); in xnn_f32_gemminc_minmax_ukernel_5x8__neonfma_lane_ld64()
|
D | 5x8inc-minmax-neon-lane-ld64.c | 199 float32x2_t vacc4x01 = vget_low_f32(vacc4x0123); in xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64() local 205 vst1_f32(c4, vacc4x01); c4 += 2; in xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64() 211 vacc4x01 = vget_high_f32(vacc4x0123); in xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64() 218 vst1_lane_f32(c4, vacc4x01, 0); in xnn_f32_gemminc_minmax_ukernel_5x8__neon_lane_ld64()
|
D | 6x8inc-minmax-neonfma-lane-ld64.c | 226 float32x2_t vacc4x01 = vget_low_f32(vacc4x0123); in xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64() local 233 vst1_f32(c4, vacc4x01); c4 += 2; in xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64() 240 vacc4x01 = vget_high_f32(vacc4x0123); in xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64() 248 vst1_lane_f32(c4, vacc4x01, 0); in xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld64()
|
D | 6x8inc-minmax-neon-lane-ld64.c | 226 float32x2_t vacc4x01 = vget_low_f32(vacc4x0123); in xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64() local 233 vst1_f32(c4, vacc4x01); c4 += 2; in xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64() 240 vacc4x01 = vget_high_f32(vacc4x0123); in xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64() 248 vst1_lane_f32(c4, vacc4x01, 0); in xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld64()
|
D | 6x8inc-minmax-neon-dup-ld64.c | 238 float32x2_t vacc4x01 = vget_low_f32(vacc4x0123); in xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64() local 245 vst1_f32(c4, vacc4x01); c4 += 2; in xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64() 252 vacc4x01 = vget_high_f32(vacc4x0123); in xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64() 260 vst1_lane_f32(c4, vacc4x01, 0); in xnn_f32_gemminc_minmax_ukernel_6x8__neon_dup_ld64()
|
D | 6x8inc-minmax-neonfma-dup-ld64.c | 238 float32x2_t vacc4x01 = vget_low_f32(vacc4x0123); in xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64() local 245 vst1_f32(c4, vacc4x01); c4 += 2; in xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64() 252 vacc4x01 = vget_high_f32(vacc4x0123); in xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64() 260 vst1_lane_f32(c4, vacc4x01, 0); in xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_dup_ld64()
|
D | 6x8inc-minmax-neonfma-lane-ld128.c | 264 float32x2_t vacc4x01 = vget_low_f32(vacc4x0123); in xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld128() local 271 vst1_f32(c4, vacc4x01); c4 += 2; in xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld128() 278 vacc4x01 = vget_high_f32(vacc4x0123); in xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld128() 286 vst1_lane_f32(c4, vacc4x01, 0); in xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld128()
|
/external/XNNPACK/src/f32-ppmm/gen/ |
D | 8x8-minmax-neon.c | 199 float32x2_t vacc4x01 = vget_low_f32(vacc4x0123); in xnn_f32_ppmm_minmax_ukernel_8x8__neon() local 208 vst1_f32(c4, vacc4x01); c4 += 2; in xnn_f32_ppmm_minmax_ukernel_8x8__neon() 217 vacc4x01 = vget_high_f32(vacc4x0123); in xnn_f32_ppmm_minmax_ukernel_8x8__neon() 227 vst1_lane_f32(c4, vacc4x01, 0); in xnn_f32_ppmm_minmax_ukernel_8x8__neon()
|
D | 8x8-minmax-neonfma.c | 227 float32x2_t vacc4x01 = vget_low_f32(vacc4x0123); in xnn_f32_ppmm_minmax_ukernel_8x8__neonfma() local 236 vst1_f32(c4, vacc4x01); c4 += 2; in xnn_f32_ppmm_minmax_ukernel_8x8__neonfma() 245 vacc4x01 = vget_high_f32(vacc4x0123); in xnn_f32_ppmm_minmax_ukernel_8x8__neonfma() 255 vst1_lane_f32(c4, vacc4x01, 0); in xnn_f32_ppmm_minmax_ukernel_8x8__neonfma()
|