/external/XNNPACK/src/f32-gemm/gen/ |
D | 4x2-neonfma-lane-ld64.c | 64 float32x2_t vacc3x01 = vacc0x01; in xnn_f32_gemm_ukernel_4x2__neonfma_lane_ld64() local 79 vacc3x01 = vfma_lane_f32(vacc3x01, vb01c0, va3, 0); in xnn_f32_gemm_ukernel_4x2__neonfma_lane_ld64() 88 vacc3x01 = vfma_f32(vacc3x01, va3c0, vb01c0); in xnn_f32_gemm_ukernel_4x2__neonfma_lane_ld64() 96 vacc3x01 = vfma_lane_f32(vacc3x01, vb01c1, va3, 1); in xnn_f32_gemm_ukernel_4x2__neonfma_lane_ld64() 105 vacc3x01 = vfma_f32(vacc3x01, va3c1, vb01c1); in xnn_f32_gemm_ukernel_4x2__neonfma_lane_ld64() 119 vacc3x01 = vfma_f32(vacc3x01, va3, vb01); in xnn_f32_gemm_ukernel_4x2__neonfma_lane_ld64() 126 vacc3x01 = vmin_f32(vacc3x01, vmax); in xnn_f32_gemm_ukernel_4x2__neonfma_lane_ld64() 132 vacc3x01 = vmax_f32(vacc3x01, vmin); in xnn_f32_gemm_ukernel_4x2__neonfma_lane_ld64() 141 vst1_f32(c3, vacc3x01); in xnn_f32_gemm_ukernel_4x2__neonfma_lane_ld64() 155 vst1_lane_f32(c3, vacc3x01, 0); in xnn_f32_gemm_ukernel_4x2__neonfma_lane_ld64()
|
D | 4x2-neon-lane-ld64.c | 64 float32x2_t vacc3x01 = vacc0x01; in xnn_f32_gemm_ukernel_4x2__neon_lane_ld64() local 78 vacc3x01 = vmla_lane_f32(vacc3x01, vb01c0, va3, 0); in xnn_f32_gemm_ukernel_4x2__neon_lane_ld64() 84 vacc3x01 = vmla_lane_f32(vacc3x01, vb01c1, va3, 1); in xnn_f32_gemm_ukernel_4x2__neon_lane_ld64() 97 vacc3x01 = vmla_f32(vacc3x01, va3, vb01); in xnn_f32_gemm_ukernel_4x2__neon_lane_ld64() 104 vacc3x01 = vmin_f32(vacc3x01, vmax); in xnn_f32_gemm_ukernel_4x2__neon_lane_ld64() 110 vacc3x01 = vmax_f32(vacc3x01, vmin); in xnn_f32_gemm_ukernel_4x2__neon_lane_ld64() 119 vst1_f32(c3, vacc3x01); in xnn_f32_gemm_ukernel_4x2__neon_lane_ld64() 133 vst1_lane_f32(c3, vacc3x01, 0); in xnn_f32_gemm_ukernel_4x2__neon_lane_ld64()
|
D | 4x8-neon-lane-ld64.c | 171 float32x2_t vacc3x01 = vget_low_f32(vacc3x0123); in xnn_f32_gemm_ukernel_4x8__neon_lane_ld64() local 176 vst1_f32(c3, vacc3x01); c3 += 2; in xnn_f32_gemm_ukernel_4x8__neon_lane_ld64() 181 vacc3x01 = vget_high_f32(vacc3x0123); in xnn_f32_gemm_ukernel_4x8__neon_lane_ld64() 187 vst1_lane_f32(c3, vacc3x01, 0); in xnn_f32_gemm_ukernel_4x8__neon_lane_ld64()
|
D | 4x8-neonfma-lane-ld64.c | 171 float32x2_t vacc3x01 = vget_low_f32(vacc3x0123); in xnn_f32_gemm_ukernel_4x8__neonfma_lane_ld64() local 176 vst1_f32(c3, vacc3x01); c3 += 2; in xnn_f32_gemm_ukernel_4x8__neonfma_lane_ld64() 181 vacc3x01 = vget_high_f32(vacc3x0123); in xnn_f32_gemm_ukernel_4x8__neonfma_lane_ld64() 187 vst1_lane_f32(c3, vacc3x01, 0); in xnn_f32_gemm_ukernel_4x8__neonfma_lane_ld64()
|
D | 4x8-neon-dup-ld64.c | 179 float32x2_t vacc3x01 = vget_low_f32(vacc3x0123); in xnn_f32_gemm_ukernel_4x8__neon_dup_ld64() local 184 vst1_f32(c3, vacc3x01); c3 += 2; in xnn_f32_gemm_ukernel_4x8__neon_dup_ld64() 189 vacc3x01 = vget_high_f32(vacc3x0123); in xnn_f32_gemm_ukernel_4x8__neon_dup_ld64() 195 vst1_lane_f32(c3, vacc3x01, 0); in xnn_f32_gemm_ukernel_4x8__neon_dup_ld64()
|
D | 4x8-neonfma-dup-ld64.c | 179 float32x2_t vacc3x01 = vget_low_f32(vacc3x0123); in xnn_f32_gemm_ukernel_4x8__neonfma_dup_ld64() local 184 vst1_f32(c3, vacc3x01); c3 += 2; in xnn_f32_gemm_ukernel_4x8__neonfma_dup_ld64() 189 vacc3x01 = vget_high_f32(vacc3x0123); in xnn_f32_gemm_ukernel_4x8__neonfma_dup_ld64() 195 vst1_lane_f32(c3, vacc3x01, 0); in xnn_f32_gemm_ukernel_4x8__neonfma_dup_ld64()
|
D | 5x8-neon-lane-ld64.c | 198 float32x2_t vacc3x01 = vget_low_f32(vacc3x0123); in xnn_f32_gemm_ukernel_5x8__neon_lane_ld64() local 204 vst1_f32(c3, vacc3x01); c3 += 2; in xnn_f32_gemm_ukernel_5x8__neon_lane_ld64() 210 vacc3x01 = vget_high_f32(vacc3x0123); in xnn_f32_gemm_ukernel_5x8__neon_lane_ld64() 217 vst1_lane_f32(c3, vacc3x01, 0); in xnn_f32_gemm_ukernel_5x8__neon_lane_ld64()
|
D | 4x8-neon-lane-ld128.c | 201 float32x2_t vacc3x01 = vget_low_f32(vacc3x0123); in xnn_f32_gemm_ukernel_4x8__neon_lane_ld128() local 206 vst1_f32(c3, vacc3x01); c3 += 2; in xnn_f32_gemm_ukernel_4x8__neon_lane_ld128() 211 vacc3x01 = vget_high_f32(vacc3x0123); in xnn_f32_gemm_ukernel_4x8__neon_lane_ld128() 217 vst1_lane_f32(c3, vacc3x01, 0); in xnn_f32_gemm_ukernel_4x8__neon_lane_ld128()
|
D | 4x8-neonfma-lane-ld128.c | 201 float32x2_t vacc3x01 = vget_low_f32(vacc3x0123); in xnn_f32_gemm_ukernel_4x8__neonfma_lane_ld128() local 206 vst1_f32(c3, vacc3x01); c3 += 2; in xnn_f32_gemm_ukernel_4x8__neonfma_lane_ld128() 211 vacc3x01 = vget_high_f32(vacc3x0123); in xnn_f32_gemm_ukernel_4x8__neonfma_lane_ld128() 217 vst1_lane_f32(c3, vacc3x01, 0); in xnn_f32_gemm_ukernel_4x8__neonfma_lane_ld128()
|
/external/XNNPACK/src/f32-igemm/gen/ |
D | 4x2-neonfma-lane-ld64.c | 62 float32x2_t vacc3x01 = vacc0x01; in xnn_f32_igemm_ukernel_4x2__neonfma_lane_ld64() local 101 vacc3x01 = vfma_lane_f32(vacc3x01, vb01c0, va3, 0); in xnn_f32_igemm_ukernel_4x2__neonfma_lane_ld64() 110 vacc3x01 = vfma_f32(vacc3x01, va3c0, vb01c0); in xnn_f32_igemm_ukernel_4x2__neonfma_lane_ld64() 118 vacc3x01 = vfma_lane_f32(vacc3x01, vb01c1, va3, 1); in xnn_f32_igemm_ukernel_4x2__neonfma_lane_ld64() 127 vacc3x01 = vfma_f32(vacc3x01, va3c1, vb01c1); in xnn_f32_igemm_ukernel_4x2__neonfma_lane_ld64() 141 vacc3x01 = vfma_f32(vacc3x01, va3, vb01); in xnn_f32_igemm_ukernel_4x2__neonfma_lane_ld64() 150 vacc3x01 = vmin_f32(vacc3x01, vmax); in xnn_f32_igemm_ukernel_4x2__neonfma_lane_ld64() 156 vacc3x01 = vmax_f32(vacc3x01, vmin); in xnn_f32_igemm_ukernel_4x2__neonfma_lane_ld64() 159 vst1_f32(c3, vacc3x01); in xnn_f32_igemm_ukernel_4x2__neonfma_lane_ld64() 172 vst1_lane_f32(c3, vacc3x01, 0); in xnn_f32_igemm_ukernel_4x2__neonfma_lane_ld64()
|
D | 4x2-neon-lane-ld64.c | 62 float32x2_t vacc3x01 = vacc0x01; in xnn_f32_igemm_ukernel_4x2__neon_lane_ld64() local 100 vacc3x01 = vmla_lane_f32(vacc3x01, vb01c0, va3, 0); in xnn_f32_igemm_ukernel_4x2__neon_lane_ld64() 106 vacc3x01 = vmla_lane_f32(vacc3x01, vb01c1, va3, 1); in xnn_f32_igemm_ukernel_4x2__neon_lane_ld64() 119 vacc3x01 = vmla_f32(vacc3x01, va3, vb01); in xnn_f32_igemm_ukernel_4x2__neon_lane_ld64() 128 vacc3x01 = vmin_f32(vacc3x01, vmax); in xnn_f32_igemm_ukernel_4x2__neon_lane_ld64() 134 vacc3x01 = vmax_f32(vacc3x01, vmin); in xnn_f32_igemm_ukernel_4x2__neon_lane_ld64() 137 vst1_f32(c3, vacc3x01); in xnn_f32_igemm_ukernel_4x2__neon_lane_ld64() 150 vst1_lane_f32(c3, vacc3x01, 0); in xnn_f32_igemm_ukernel_4x2__neon_lane_ld64()
|
D | 4x4-neon-lane-ld64.c | 150 float32x2_t vacc3x01 = vget_low_f32(vacc3x0123); in xnn_f32_igemm_ukernel_4x4__neon_lane_ld64() local 155 vst1_f32(c3, vacc3x01); c3 += 2; in xnn_f32_igemm_ukernel_4x4__neon_lane_ld64() 160 vacc3x01 = vget_high_f32(vacc3x0123); in xnn_f32_igemm_ukernel_4x4__neon_lane_ld64() 166 vst1_lane_f32(c3, vacc3x01, 0); in xnn_f32_igemm_ukernel_4x4__neon_lane_ld64()
|
D | 4x4-neonfma-lane-ld64.c | 150 float32x2_t vacc3x01 = vget_low_f32(vacc3x0123); in xnn_f32_igemm_ukernel_4x4__neonfma_lane_ld64() local 155 vst1_f32(c3, vacc3x01); c3 += 2; in xnn_f32_igemm_ukernel_4x4__neonfma_lane_ld64() 160 vacc3x01 = vget_high_f32(vacc3x0123); in xnn_f32_igemm_ukernel_4x4__neonfma_lane_ld64() 166 vst1_lane_f32(c3, vacc3x01, 0); in xnn_f32_igemm_ukernel_4x4__neonfma_lane_ld64()
|
D | 4x8-neon-dup-ld64.c | 200 float32x2_t vacc3x01 = vget_low_f32(vacc3x0123); in xnn_f32_igemm_ukernel_4x8__neon_dup_ld64() local 205 vst1_f32(c3, vacc3x01); c3 += 2; in xnn_f32_igemm_ukernel_4x8__neon_dup_ld64() 210 vacc3x01 = vget_high_f32(vacc3x0123); in xnn_f32_igemm_ukernel_4x8__neon_dup_ld64() 216 vst1_lane_f32(c3, vacc3x01, 0); in xnn_f32_igemm_ukernel_4x8__neon_dup_ld64()
|
D | 4x8-neonfma-dup-ld64.c | 200 float32x2_t vacc3x01 = vget_low_f32(vacc3x0123); in xnn_f32_igemm_ukernel_4x8__neonfma_dup_ld64() local 205 vst1_f32(c3, vacc3x01); c3 += 2; in xnn_f32_igemm_ukernel_4x8__neonfma_dup_ld64() 210 vacc3x01 = vget_high_f32(vacc3x0123); in xnn_f32_igemm_ukernel_4x8__neonfma_dup_ld64() 216 vst1_lane_f32(c3, vacc3x01, 0); in xnn_f32_igemm_ukernel_4x8__neonfma_dup_ld64()
|
D | 4x8-neon-lane-ld64.c | 192 float32x2_t vacc3x01 = vget_low_f32(vacc3x0123); in xnn_f32_igemm_ukernel_4x8__neon_lane_ld64() local 197 vst1_f32(c3, vacc3x01); c3 += 2; in xnn_f32_igemm_ukernel_4x8__neon_lane_ld64() 202 vacc3x01 = vget_high_f32(vacc3x0123); in xnn_f32_igemm_ukernel_4x8__neon_lane_ld64() 208 vst1_lane_f32(c3, vacc3x01, 0); in xnn_f32_igemm_ukernel_4x8__neon_lane_ld64()
|
D | 4x8-neonfma-lane-ld64.c | 192 float32x2_t vacc3x01 = vget_low_f32(vacc3x0123); in xnn_f32_igemm_ukernel_4x8__neonfma_lane_ld64() local 197 vst1_f32(c3, vacc3x01); c3 += 2; in xnn_f32_igemm_ukernel_4x8__neonfma_lane_ld64() 202 vacc3x01 = vget_high_f32(vacc3x0123); in xnn_f32_igemm_ukernel_4x8__neonfma_lane_ld64() 208 vst1_lane_f32(c3, vacc3x01, 0); in xnn_f32_igemm_ukernel_4x8__neonfma_lane_ld64()
|
/external/XNNPACK/src/f32-ppmm/gen/ |
D | 4x8-neon.c | 127 float32x2_t vacc3x01 = vget_low_f32(vacc3x0123); in xnn_f32_ppmm_ukernel_4x8__neon() local 132 vst1_f32(c3, vacc3x01); c3 += 2; in xnn_f32_ppmm_ukernel_4x8__neon() 137 vacc3x01 = vget_high_f32(vacc3x0123); in xnn_f32_ppmm_ukernel_4x8__neon() 143 vst1_lane_f32(c3, vacc3x01, 0); in xnn_f32_ppmm_ukernel_4x8__neon()
|
D | 4x8-neonfma.c | 143 float32x2_t vacc3x01 = vget_low_f32(vacc3x0123); in xnn_f32_ppmm_ukernel_4x8__neonfma() local 148 vst1_f32(c3, vacc3x01); c3 += 2; in xnn_f32_ppmm_ukernel_4x8__neonfma() 153 vacc3x01 = vget_high_f32(vacc3x0123); in xnn_f32_ppmm_ukernel_4x8__neonfma() 159 vst1_lane_f32(c3, vacc3x01, 0); in xnn_f32_ppmm_ukernel_4x8__neonfma()
|
D | 8x8-neon.c | 200 float32x2_t vacc3x01 = vget_low_f32(vacc3x0123); in xnn_f32_ppmm_ukernel_8x8__neon() local 209 vst1_f32(c3, vacc3x01); c3 += 2; in xnn_f32_ppmm_ukernel_8x8__neon() 218 vacc3x01 = vget_high_f32(vacc3x0123); in xnn_f32_ppmm_ukernel_8x8__neon() 228 vst1_lane_f32(c3, vacc3x01, 0); in xnn_f32_ppmm_ukernel_8x8__neon()
|
/external/XNNPACK/src/f32-gemm/gen-inc/ |
D | 4x8-neonfma-lane-ld64.c | 173 float32x2_t vacc3x01 = vget_low_f32(vacc3x0123); in xnn_f32_gemminc_ukernel_4x8__neonfma_lane_ld64() local 178 vst1_f32(c3, vacc3x01); c3 += 2; in xnn_f32_gemminc_ukernel_4x8__neonfma_lane_ld64() 183 vacc3x01 = vget_high_f32(vacc3x0123); in xnn_f32_gemminc_ukernel_4x8__neonfma_lane_ld64() 189 vst1_lane_f32(c3, vacc3x01, 0); in xnn_f32_gemminc_ukernel_4x8__neonfma_lane_ld64()
|
D | 4x8-neon-lane-ld64.c | 173 float32x2_t vacc3x01 = vget_low_f32(vacc3x0123); in xnn_f32_gemminc_ukernel_4x8__neon_lane_ld64() local 178 vst1_f32(c3, vacc3x01); c3 += 2; in xnn_f32_gemminc_ukernel_4x8__neon_lane_ld64() 183 vacc3x01 = vget_high_f32(vacc3x0123); in xnn_f32_gemminc_ukernel_4x8__neon_lane_ld64() 189 vst1_lane_f32(c3, vacc3x01, 0); in xnn_f32_gemminc_ukernel_4x8__neon_lane_ld64()
|
D | 4x8-neon-dup-ld64.c | 181 float32x2_t vacc3x01 = vget_low_f32(vacc3x0123); in xnn_f32_gemminc_ukernel_4x8__neon_dup_ld64() local 186 vst1_f32(c3, vacc3x01); c3 += 2; in xnn_f32_gemminc_ukernel_4x8__neon_dup_ld64() 191 vacc3x01 = vget_high_f32(vacc3x0123); in xnn_f32_gemminc_ukernel_4x8__neon_dup_ld64() 197 vst1_lane_f32(c3, vacc3x01, 0); in xnn_f32_gemminc_ukernel_4x8__neon_dup_ld64()
|
D | 4x8-neonfma-dup-ld64.c | 181 float32x2_t vacc3x01 = vget_low_f32(vacc3x0123); in xnn_f32_gemminc_ukernel_4x8__neonfma_dup_ld64() local 186 vst1_f32(c3, vacc3x01); c3 += 2; in xnn_f32_gemminc_ukernel_4x8__neonfma_dup_ld64() 191 vacc3x01 = vget_high_f32(vacc3x0123); in xnn_f32_gemminc_ukernel_4x8__neonfma_dup_ld64() 197 vst1_lane_f32(c3, vacc3x01, 0); in xnn_f32_gemminc_ukernel_4x8__neonfma_dup_ld64()
|
D | 5x8-neonfma-lane-ld64.c | 200 float32x2_t vacc3x01 = vget_low_f32(vacc3x0123); in xnn_f32_gemminc_ukernel_5x8__neonfma_lane_ld64() local 206 vst1_f32(c3, vacc3x01); c3 += 2; in xnn_f32_gemminc_ukernel_5x8__neonfma_lane_ld64() 212 vacc3x01 = vget_high_f32(vacc3x0123); in xnn_f32_gemminc_ukernel_5x8__neonfma_lane_ld64() 219 vst1_lane_f32(c3, vacc3x01, 0); in xnn_f32_gemminc_ukernel_5x8__neonfma_lane_ld64()
|