/external/XNNPACK/src/f32-gemm/gen/ |
D | 5x8-neon-lane-ld64.c | 197 float32x2_t vacc4x01 = vget_low_f32(vacc4x0123); in xnn_f32_gemm_ukernel_5x8__neon_lane_ld64() local 203 vst1_f32(c4, vacc4x01); c4 += 2; in xnn_f32_gemm_ukernel_5x8__neon_lane_ld64() 209 vacc4x01 = vget_high_f32(vacc4x0123); in xnn_f32_gemm_ukernel_5x8__neon_lane_ld64() 216 vst1_lane_f32(c4, vacc4x01, 0); in xnn_f32_gemm_ukernel_5x8__neon_lane_ld64()
|
D | 5x8-neonfma-lane-ld64.c | 197 float32x2_t vacc4x01 = vget_low_f32(vacc4x0123); in xnn_f32_gemm_ukernel_5x8__neonfma_lane_ld64() local 203 vst1_f32(c4, vacc4x01); c4 += 2; in xnn_f32_gemm_ukernel_5x8__neonfma_lane_ld64() 209 vacc4x01 = vget_high_f32(vacc4x0123); in xnn_f32_gemm_ukernel_5x8__neonfma_lane_ld64() 216 vst1_lane_f32(c4, vacc4x01, 0); in xnn_f32_gemm_ukernel_5x8__neonfma_lane_ld64()
|
D | 6x8-neonfma-lane-ld64.c | 224 float32x2_t vacc4x01 = vget_low_f32(vacc4x0123); in xnn_f32_gemm_ukernel_6x8__neonfma_lane_ld64() local 231 vst1_f32(c4, vacc4x01); c4 += 2; in xnn_f32_gemm_ukernel_6x8__neonfma_lane_ld64() 238 vacc4x01 = vget_high_f32(vacc4x0123); in xnn_f32_gemm_ukernel_6x8__neonfma_lane_ld64() 246 vst1_lane_f32(c4, vacc4x01, 0); in xnn_f32_gemm_ukernel_6x8__neonfma_lane_ld64()
|
D | 6x8-neon-lane-ld64.c | 224 float32x2_t vacc4x01 = vget_low_f32(vacc4x0123); in xnn_f32_gemm_ukernel_6x8__neon_lane_ld64() local 231 vst1_f32(c4, vacc4x01); c4 += 2; in xnn_f32_gemm_ukernel_6x8__neon_lane_ld64() 238 vacc4x01 = vget_high_f32(vacc4x0123); in xnn_f32_gemm_ukernel_6x8__neon_lane_ld64() 246 vst1_lane_f32(c4, vacc4x01, 0); in xnn_f32_gemm_ukernel_6x8__neon_lane_ld64()
|
D | 6x8-neonfma-dup-ld64.c | 236 float32x2_t vacc4x01 = vget_low_f32(vacc4x0123); in xnn_f32_gemm_ukernel_6x8__neonfma_dup_ld64() local 243 vst1_f32(c4, vacc4x01); c4 += 2; in xnn_f32_gemm_ukernel_6x8__neonfma_dup_ld64() 250 vacc4x01 = vget_high_f32(vacc4x0123); in xnn_f32_gemm_ukernel_6x8__neonfma_dup_ld64() 258 vst1_lane_f32(c4, vacc4x01, 0); in xnn_f32_gemm_ukernel_6x8__neonfma_dup_ld64()
|
D | 6x8-neon-dup-ld64.c | 236 float32x2_t vacc4x01 = vget_low_f32(vacc4x0123); in xnn_f32_gemm_ukernel_6x8__neon_dup_ld64() local 243 vst1_f32(c4, vacc4x01); c4 += 2; in xnn_f32_gemm_ukernel_6x8__neon_dup_ld64() 250 vacc4x01 = vget_high_f32(vacc4x0123); in xnn_f32_gemm_ukernel_6x8__neon_dup_ld64() 258 vst1_lane_f32(c4, vacc4x01, 0); in xnn_f32_gemm_ukernel_6x8__neon_dup_ld64()
|
D | 6x8s4-neon.c | 287 float32x2_t vacc4x01 = vget_low_f32(vacc4x0123); in xnn_f32_gemm_ukernel_6x8s4__neon() local 294 vst1_f32(c4, vacc4x01); c4 += 2; in xnn_f32_gemm_ukernel_6x8s4__neon() 301 vacc4x01 = vget_high_f32(vacc4x0123); in xnn_f32_gemm_ukernel_6x8s4__neon() 309 vst1_lane_f32(c4, vacc4x01, 0); in xnn_f32_gemm_ukernel_6x8s4__neon()
|
D | 6x8s4-neonfma.c | 287 float32x2_t vacc4x01 = vget_low_f32(vacc4x0123); in xnn_f32_gemm_ukernel_6x8s4__neonfma() local 294 vst1_f32(c4, vacc4x01); c4 += 2; in xnn_f32_gemm_ukernel_6x8s4__neonfma() 301 vacc4x01 = vget_high_f32(vacc4x0123); in xnn_f32_gemm_ukernel_6x8s4__neonfma() 309 vst1_lane_f32(c4, vacc4x01, 0); in xnn_f32_gemm_ukernel_6x8s4__neonfma()
|
/external/XNNPACK/src/f32-ppmm/gen/ |
D | 8x8-neon.c | 199 float32x2_t vacc4x01 = vget_low_f32(vacc4x0123); in xnn_f32_ppmm_ukernel_8x8__neon() local 208 vst1_f32(c4, vacc4x01); c4 += 2; in xnn_f32_ppmm_ukernel_8x8__neon() 217 vacc4x01 = vget_high_f32(vacc4x0123); in xnn_f32_ppmm_ukernel_8x8__neon() 227 vst1_lane_f32(c4, vacc4x01, 0); in xnn_f32_ppmm_ukernel_8x8__neon()
|
D | 8x8-neonfma.c | 227 float32x2_t vacc4x01 = vget_low_f32(vacc4x0123); in xnn_f32_ppmm_ukernel_8x8__neonfma() local 236 vst1_f32(c4, vacc4x01); c4 += 2; in xnn_f32_ppmm_ukernel_8x8__neonfma() 245 vacc4x01 = vget_high_f32(vacc4x0123); in xnn_f32_ppmm_ukernel_8x8__neonfma() 255 vst1_lane_f32(c4, vacc4x01, 0); in xnn_f32_ppmm_ukernel_8x8__neonfma()
|
/external/XNNPACK/src/f32-gemm/gen-inc/ |
D | 5x8-neonfma-lane-ld64.c | 199 float32x2_t vacc4x01 = vget_low_f32(vacc4x0123); in xnn_f32_gemminc_ukernel_5x8__neonfma_lane_ld64() local 205 vst1_f32(c4, vacc4x01); c4 += 2; in xnn_f32_gemminc_ukernel_5x8__neonfma_lane_ld64() 211 vacc4x01 = vget_high_f32(vacc4x0123); in xnn_f32_gemminc_ukernel_5x8__neonfma_lane_ld64() 218 vst1_lane_f32(c4, vacc4x01, 0); in xnn_f32_gemminc_ukernel_5x8__neonfma_lane_ld64()
|
D | 5x8-neon-lane-ld64.c | 199 float32x2_t vacc4x01 = vget_low_f32(vacc4x0123); in xnn_f32_gemminc_ukernel_5x8__neon_lane_ld64() local 205 vst1_f32(c4, vacc4x01); c4 += 2; in xnn_f32_gemminc_ukernel_5x8__neon_lane_ld64() 211 vacc4x01 = vget_high_f32(vacc4x0123); in xnn_f32_gemminc_ukernel_5x8__neon_lane_ld64() 218 vst1_lane_f32(c4, vacc4x01, 0); in xnn_f32_gemminc_ukernel_5x8__neon_lane_ld64()
|
D | 6x8-neon-lane-ld64.c | 226 float32x2_t vacc4x01 = vget_low_f32(vacc4x0123); in xnn_f32_gemminc_ukernel_6x8__neon_lane_ld64() local 233 vst1_f32(c4, vacc4x01); c4 += 2; in xnn_f32_gemminc_ukernel_6x8__neon_lane_ld64() 240 vacc4x01 = vget_high_f32(vacc4x0123); in xnn_f32_gemminc_ukernel_6x8__neon_lane_ld64() 248 vst1_lane_f32(c4, vacc4x01, 0); in xnn_f32_gemminc_ukernel_6x8__neon_lane_ld64()
|
D | 6x8-neonfma-lane-ld64.c | 226 float32x2_t vacc4x01 = vget_low_f32(vacc4x0123); in xnn_f32_gemminc_ukernel_6x8__neonfma_lane_ld64() local 233 vst1_f32(c4, vacc4x01); c4 += 2; in xnn_f32_gemminc_ukernel_6x8__neonfma_lane_ld64() 240 vacc4x01 = vget_high_f32(vacc4x0123); in xnn_f32_gemminc_ukernel_6x8__neonfma_lane_ld64() 248 vst1_lane_f32(c4, vacc4x01, 0); in xnn_f32_gemminc_ukernel_6x8__neonfma_lane_ld64()
|
D | 6x8-neon-dup-ld64.c | 238 float32x2_t vacc4x01 = vget_low_f32(vacc4x0123); in xnn_f32_gemminc_ukernel_6x8__neon_dup_ld64() local 245 vst1_f32(c4, vacc4x01); c4 += 2; in xnn_f32_gemminc_ukernel_6x8__neon_dup_ld64() 252 vacc4x01 = vget_high_f32(vacc4x0123); in xnn_f32_gemminc_ukernel_6x8__neon_dup_ld64() 260 vst1_lane_f32(c4, vacc4x01, 0); in xnn_f32_gemminc_ukernel_6x8__neon_dup_ld64()
|
D | 6x8-neonfma-dup-ld64.c | 238 float32x2_t vacc4x01 = vget_low_f32(vacc4x0123); in xnn_f32_gemminc_ukernel_6x8__neonfma_dup_ld64() local 245 vst1_f32(c4, vacc4x01); c4 += 2; in xnn_f32_gemminc_ukernel_6x8__neonfma_dup_ld64() 252 vacc4x01 = vget_high_f32(vacc4x0123); in xnn_f32_gemminc_ukernel_6x8__neonfma_dup_ld64() 260 vst1_lane_f32(c4, vacc4x01, 0); in xnn_f32_gemminc_ukernel_6x8__neonfma_dup_ld64()
|
D | 6x8-neonfma-lane-ld128.c | 264 float32x2_t vacc4x01 = vget_low_f32(vacc4x0123); in xnn_f32_gemminc_ukernel_6x8__neonfma_lane_ld128() local 271 vst1_f32(c4, vacc4x01); c4 += 2; in xnn_f32_gemminc_ukernel_6x8__neonfma_lane_ld128() 278 vacc4x01 = vget_high_f32(vacc4x0123); in xnn_f32_gemminc_ukernel_6x8__neonfma_lane_ld128() 286 vst1_lane_f32(c4, vacc4x01, 0); in xnn_f32_gemminc_ukernel_6x8__neonfma_lane_ld128()
|
D | 6x8s4-neonfma.c | 289 float32x2_t vacc4x01 = vget_low_f32(vacc4x0123); in xnn_f32_gemminc_ukernel_6x8s4__neonfma() local 296 vst1_f32(c4, vacc4x01); c4 += 2; in xnn_f32_gemminc_ukernel_6x8s4__neonfma() 303 vacc4x01 = vget_high_f32(vacc4x0123); in xnn_f32_gemminc_ukernel_6x8s4__neonfma() 311 vst1_lane_f32(c4, vacc4x01, 0); in xnn_f32_gemminc_ukernel_6x8s4__neonfma()
|
D | 6x8-neon-lane-ld128.c | 264 float32x2_t vacc4x01 = vget_low_f32(vacc4x0123); in xnn_f32_gemminc_ukernel_6x8__neon_lane_ld128() local 271 vst1_f32(c4, vacc4x01); c4 += 2; in xnn_f32_gemminc_ukernel_6x8__neon_lane_ld128() 278 vacc4x01 = vget_high_f32(vacc4x0123); in xnn_f32_gemminc_ukernel_6x8__neon_lane_ld128() 286 vst1_lane_f32(c4, vacc4x01, 0); in xnn_f32_gemminc_ukernel_6x8__neon_lane_ld128()
|
/external/XNNPACK/src/f32-igemm/gen/ |
D | 6x8-neon-lane-ld64.c | 249 float32x2_t vacc4x01 = vget_low_f32(vacc4x0123); in xnn_f32_igemm_ukernel_6x8__neon_lane_ld64() local 256 vst1_f32(c4, vacc4x01); c4 += 2; in xnn_f32_igemm_ukernel_6x8__neon_lane_ld64() 263 vacc4x01 = vget_high_f32(vacc4x0123); in xnn_f32_igemm_ukernel_6x8__neon_lane_ld64() 271 vst1_lane_f32(c4, vacc4x01, 0); in xnn_f32_igemm_ukernel_6x8__neon_lane_ld64()
|
D | 6x8-neonfma-lane-ld64.c | 249 float32x2_t vacc4x01 = vget_low_f32(vacc4x0123); in xnn_f32_igemm_ukernel_6x8__neonfma_lane_ld64() local 256 vst1_f32(c4, vacc4x01); c4 += 2; in xnn_f32_igemm_ukernel_6x8__neonfma_lane_ld64() 263 vacc4x01 = vget_high_f32(vacc4x0123); in xnn_f32_igemm_ukernel_6x8__neonfma_lane_ld64() 271 vst1_lane_f32(c4, vacc4x01, 0); in xnn_f32_igemm_ukernel_6x8__neonfma_lane_ld64()
|
D | 6x8-neonfma-dup-ld64.c | 261 float32x2_t vacc4x01 = vget_low_f32(vacc4x0123); in xnn_f32_igemm_ukernel_6x8__neonfma_dup_ld64() local 268 vst1_f32(c4, vacc4x01); c4 += 2; in xnn_f32_igemm_ukernel_6x8__neonfma_dup_ld64() 275 vacc4x01 = vget_high_f32(vacc4x0123); in xnn_f32_igemm_ukernel_6x8__neonfma_dup_ld64() 283 vst1_lane_f32(c4, vacc4x01, 0); in xnn_f32_igemm_ukernel_6x8__neonfma_dup_ld64()
|
D | 6x8-neon-dup-ld64.c | 261 float32x2_t vacc4x01 = vget_low_f32(vacc4x0123); in xnn_f32_igemm_ukernel_6x8__neon_dup_ld64() local 268 vst1_f32(c4, vacc4x01); c4 += 2; in xnn_f32_igemm_ukernel_6x8__neon_dup_ld64() 275 vacc4x01 = vget_high_f32(vacc4x0123); in xnn_f32_igemm_ukernel_6x8__neon_dup_ld64() 283 vst1_lane_f32(c4, vacc4x01, 0); in xnn_f32_igemm_ukernel_6x8__neon_dup_ld64()
|
D | 6x8-neonfma-lane-ld128.c | 287 float32x2_t vacc4x01 = vget_low_f32(vacc4x0123); in xnn_f32_igemm_ukernel_6x8__neonfma_lane_ld128() local 294 vst1_f32(c4, vacc4x01); c4 += 2; in xnn_f32_igemm_ukernel_6x8__neonfma_lane_ld128() 301 vacc4x01 = vget_high_f32(vacc4x0123); in xnn_f32_igemm_ukernel_6x8__neonfma_lane_ld128() 309 vst1_lane_f32(c4, vacc4x01, 0); in xnn_f32_igemm_ukernel_6x8__neonfma_lane_ld128()
|
D | 6x8-neon-lane-ld128.c | 287 float32x2_t vacc4x01 = vget_low_f32(vacc4x0123); in xnn_f32_igemm_ukernel_6x8__neon_lane_ld128() local 294 vst1_f32(c4, vacc4x01); c4 += 2; in xnn_f32_igemm_ukernel_6x8__neon_lane_ld128() 301 vacc4x01 = vget_high_f32(vacc4x0123); in xnn_f32_igemm_ukernel_6x8__neon_lane_ld128() 309 vst1_lane_f32(c4, vacc4x01, 0); in xnn_f32_igemm_ukernel_6x8__neon_lane_ld128()
|