/external/XNNPACK/src/f32-gemm/gen/ |
D | 4x2-neonfma-lane-ld64.c | 62 float32x2_t vacc1x01 = vacc0x01; in xnn_f32_gemm_ukernel_4x2__neonfma_lane_ld64() local 77 vacc1x01 = vfma_lane_f32(vacc1x01, vb01c0, va1, 0); in xnn_f32_gemm_ukernel_4x2__neonfma_lane_ld64() 86 vacc1x01 = vfma_f32(vacc1x01, va1c0, vb01c0); in xnn_f32_gemm_ukernel_4x2__neonfma_lane_ld64() 94 vacc1x01 = vfma_lane_f32(vacc1x01, vb01c1, va1, 1); in xnn_f32_gemm_ukernel_4x2__neonfma_lane_ld64() 103 vacc1x01 = vfma_f32(vacc1x01, va1c1, vb01c1); in xnn_f32_gemm_ukernel_4x2__neonfma_lane_ld64() 117 vacc1x01 = vfma_f32(vacc1x01, va1, vb01); in xnn_f32_gemm_ukernel_4x2__neonfma_lane_ld64() 124 vacc1x01 = vmin_f32(vacc1x01, vmax); in xnn_f32_gemm_ukernel_4x2__neonfma_lane_ld64() 130 vacc1x01 = vmax_f32(vacc1x01, vmin); in xnn_f32_gemm_ukernel_4x2__neonfma_lane_ld64() 137 vst1_f32(c1, vacc1x01); in xnn_f32_gemm_ukernel_4x2__neonfma_lane_ld64() 153 vst1_lane_f32(c1, vacc1x01, 0); in xnn_f32_gemm_ukernel_4x2__neonfma_lane_ld64()
|
D | 4x2-neon-lane-ld64.c | 62 float32x2_t vacc1x01 = vacc0x01; in xnn_f32_gemm_ukernel_4x2__neon_lane_ld64() local 76 vacc1x01 = vmla_lane_f32(vacc1x01, vb01c0, va1, 0); in xnn_f32_gemm_ukernel_4x2__neon_lane_ld64() 82 vacc1x01 = vmla_lane_f32(vacc1x01, vb01c1, va1, 1); in xnn_f32_gemm_ukernel_4x2__neon_lane_ld64() 95 vacc1x01 = vmla_f32(vacc1x01, va1, vb01); in xnn_f32_gemm_ukernel_4x2__neon_lane_ld64() 102 vacc1x01 = vmin_f32(vacc1x01, vmax); in xnn_f32_gemm_ukernel_4x2__neon_lane_ld64() 108 vacc1x01 = vmax_f32(vacc1x01, vmin); in xnn_f32_gemm_ukernel_4x2__neon_lane_ld64() 115 vst1_f32(c1, vacc1x01); in xnn_f32_gemm_ukernel_4x2__neon_lane_ld64() 131 vst1_lane_f32(c1, vacc1x01, 0); in xnn_f32_gemm_ukernel_4x2__neon_lane_ld64()
|
D | 4x8-neon-lane-ld64.c | 173 float32x2_t vacc1x01 = vget_low_f32(vacc1x0123); in xnn_f32_gemm_ukernel_4x8__neon_lane_ld64() local 178 vst1_f32(c1, vacc1x01); c1 += 2; in xnn_f32_gemm_ukernel_4x8__neon_lane_ld64() 183 vacc1x01 = vget_high_f32(vacc1x0123); in xnn_f32_gemm_ukernel_4x8__neon_lane_ld64() 189 vst1_lane_f32(c1, vacc1x01, 0); in xnn_f32_gemm_ukernel_4x8__neon_lane_ld64()
|
D | 4x8-neonfma-lane-ld64.c | 173 float32x2_t vacc1x01 = vget_low_f32(vacc1x0123); in xnn_f32_gemm_ukernel_4x8__neonfma_lane_ld64() local 178 vst1_f32(c1, vacc1x01); c1 += 2; in xnn_f32_gemm_ukernel_4x8__neonfma_lane_ld64() 183 vacc1x01 = vget_high_f32(vacc1x0123); in xnn_f32_gemm_ukernel_4x8__neonfma_lane_ld64() 189 vst1_lane_f32(c1, vacc1x01, 0); in xnn_f32_gemm_ukernel_4x8__neonfma_lane_ld64()
|
D | 4x8-neon-dup-ld64.c | 181 float32x2_t vacc1x01 = vget_low_f32(vacc1x0123); in xnn_f32_gemm_ukernel_4x8__neon_dup_ld64() local 186 vst1_f32(c1, vacc1x01); c1 += 2; in xnn_f32_gemm_ukernel_4x8__neon_dup_ld64() 191 vacc1x01 = vget_high_f32(vacc1x0123); in xnn_f32_gemm_ukernel_4x8__neon_dup_ld64() 197 vst1_lane_f32(c1, vacc1x01, 0); in xnn_f32_gemm_ukernel_4x8__neon_dup_ld64()
|
D | 4x8-neonfma-dup-ld64.c | 181 float32x2_t vacc1x01 = vget_low_f32(vacc1x0123); in xnn_f32_gemm_ukernel_4x8__neonfma_dup_ld64() local 186 vst1_f32(c1, vacc1x01); c1 += 2; in xnn_f32_gemm_ukernel_4x8__neonfma_dup_ld64() 191 vacc1x01 = vget_high_f32(vacc1x0123); in xnn_f32_gemm_ukernel_4x8__neonfma_dup_ld64() 197 vst1_lane_f32(c1, vacc1x01, 0); in xnn_f32_gemm_ukernel_4x8__neonfma_dup_ld64()
|
/external/XNNPACK/src/f32-igemm/gen/ |
D | 4x2-neonfma-lane-ld64.c | 60 float32x2_t vacc1x01 = vacc0x01; in xnn_f32_igemm_ukernel_4x2__neonfma_lane_ld64() local 99 vacc1x01 = vfma_lane_f32(vacc1x01, vb01c0, va1, 0); in xnn_f32_igemm_ukernel_4x2__neonfma_lane_ld64() 108 vacc1x01 = vfma_f32(vacc1x01, va1c0, vb01c0); in xnn_f32_igemm_ukernel_4x2__neonfma_lane_ld64() 116 vacc1x01 = vfma_lane_f32(vacc1x01, vb01c1, va1, 1); in xnn_f32_igemm_ukernel_4x2__neonfma_lane_ld64() 125 vacc1x01 = vfma_f32(vacc1x01, va1c1, vb01c1); in xnn_f32_igemm_ukernel_4x2__neonfma_lane_ld64() 139 vacc1x01 = vfma_f32(vacc1x01, va1, vb01); in xnn_f32_igemm_ukernel_4x2__neonfma_lane_ld64() 148 vacc1x01 = vmin_f32(vacc1x01, vmax); in xnn_f32_igemm_ukernel_4x2__neonfma_lane_ld64() 154 vacc1x01 = vmax_f32(vacc1x01, vmin); in xnn_f32_igemm_ukernel_4x2__neonfma_lane_ld64() 163 vst1_f32(c1, vacc1x01); in xnn_f32_igemm_ukernel_4x2__neonfma_lane_ld64() 174 vst1_lane_f32(c1, vacc1x01, 0); in xnn_f32_igemm_ukernel_4x2__neonfma_lane_ld64()
|
D | 4x2-neon-lane-ld64.c | 60 float32x2_t vacc1x01 = vacc0x01; in xnn_f32_igemm_ukernel_4x2__neon_lane_ld64() local 98 vacc1x01 = vmla_lane_f32(vacc1x01, vb01c0, va1, 0); in xnn_f32_igemm_ukernel_4x2__neon_lane_ld64() 104 vacc1x01 = vmla_lane_f32(vacc1x01, vb01c1, va1, 1); in xnn_f32_igemm_ukernel_4x2__neon_lane_ld64() 117 vacc1x01 = vmla_f32(vacc1x01, va1, vb01); in xnn_f32_igemm_ukernel_4x2__neon_lane_ld64() 126 vacc1x01 = vmin_f32(vacc1x01, vmax); in xnn_f32_igemm_ukernel_4x2__neon_lane_ld64() 132 vacc1x01 = vmax_f32(vacc1x01, vmin); in xnn_f32_igemm_ukernel_4x2__neon_lane_ld64() 141 vst1_f32(c1, vacc1x01); in xnn_f32_igemm_ukernel_4x2__neon_lane_ld64() 152 vst1_lane_f32(c1, vacc1x01, 0); in xnn_f32_igemm_ukernel_4x2__neon_lane_ld64()
|
D | 4x4-neon-lane-ld64.c | 152 float32x2_t vacc1x01 = vget_low_f32(vacc1x0123); in xnn_f32_igemm_ukernel_4x4__neon_lane_ld64() local 157 vst1_f32(c1, vacc1x01); c1 += 2; in xnn_f32_igemm_ukernel_4x4__neon_lane_ld64() 162 vacc1x01 = vget_high_f32(vacc1x0123); in xnn_f32_igemm_ukernel_4x4__neon_lane_ld64() 168 vst1_lane_f32(c1, vacc1x01, 0); in xnn_f32_igemm_ukernel_4x4__neon_lane_ld64()
|
D | 4x4-neonfma-lane-ld64.c | 152 float32x2_t vacc1x01 = vget_low_f32(vacc1x0123); in xnn_f32_igemm_ukernel_4x4__neonfma_lane_ld64() local 157 vst1_f32(c1, vacc1x01); c1 += 2; in xnn_f32_igemm_ukernel_4x4__neonfma_lane_ld64() 162 vacc1x01 = vget_high_f32(vacc1x0123); in xnn_f32_igemm_ukernel_4x4__neonfma_lane_ld64() 168 vst1_lane_f32(c1, vacc1x01, 0); in xnn_f32_igemm_ukernel_4x4__neonfma_lane_ld64()
|
D | 4x8-neon-dup-ld64.c | 202 float32x2_t vacc1x01 = vget_low_f32(vacc1x0123); in xnn_f32_igemm_ukernel_4x8__neon_dup_ld64() local 207 vst1_f32(c1, vacc1x01); c1 += 2; in xnn_f32_igemm_ukernel_4x8__neon_dup_ld64() 212 vacc1x01 = vget_high_f32(vacc1x0123); in xnn_f32_igemm_ukernel_4x8__neon_dup_ld64() 218 vst1_lane_f32(c1, vacc1x01, 0); in xnn_f32_igemm_ukernel_4x8__neon_dup_ld64()
|
D | 4x8-neonfma-dup-ld64.c | 202 float32x2_t vacc1x01 = vget_low_f32(vacc1x0123); in xnn_f32_igemm_ukernel_4x8__neonfma_dup_ld64() local 207 vst1_f32(c1, vacc1x01); c1 += 2; in xnn_f32_igemm_ukernel_4x8__neonfma_dup_ld64() 212 vacc1x01 = vget_high_f32(vacc1x0123); in xnn_f32_igemm_ukernel_4x8__neonfma_dup_ld64() 218 vst1_lane_f32(c1, vacc1x01, 0); in xnn_f32_igemm_ukernel_4x8__neonfma_dup_ld64()
|
D | 4x8-neon-lane-ld64.c | 194 float32x2_t vacc1x01 = vget_low_f32(vacc1x0123); in xnn_f32_igemm_ukernel_4x8__neon_lane_ld64() local 199 vst1_f32(c1, vacc1x01); c1 += 2; in xnn_f32_igemm_ukernel_4x8__neon_lane_ld64() 204 vacc1x01 = vget_high_f32(vacc1x0123); in xnn_f32_igemm_ukernel_4x8__neon_lane_ld64() 210 vst1_lane_f32(c1, vacc1x01, 0); in xnn_f32_igemm_ukernel_4x8__neon_lane_ld64()
|
/external/XNNPACK/src/f32-vmulcaddc/gen/ |
D | c4-neonfma-2x.c | 89 float32x2_t vacc1x01 = vget_low_f32(vacc1x0123); in xnn_f32_vmulcaddc_ukernel_c4__neonfma_2x() local 92 vst1_f32(o1, vacc1x01); o1 += 2; in xnn_f32_vmulcaddc_ukernel_c4__neonfma_2x() 95 vacc1x01 = vget_high_f32(vacc1x0123); in xnn_f32_vmulcaddc_ukernel_c4__neonfma_2x() 99 vst1_lane_f32(o1, vacc1x01, 0); o1 += 1; in xnn_f32_vmulcaddc_ukernel_c4__neonfma_2x()
|
D | c4-neon-2x.c | 93 float32x2_t vacc1x01 = vget_low_f32(vacc1x0123); in xnn_f32_vmulcaddc_ukernel_c4__neon_2x() local 96 vst1_f32(o1, vacc1x01); o1 += 2; in xnn_f32_vmulcaddc_ukernel_c4__neon_2x() 99 vacc1x01 = vget_high_f32(vacc1x0123); in xnn_f32_vmulcaddc_ukernel_c4__neon_2x() 103 vst1_lane_f32(o1, vacc1x01, 0); o1 += 1; in xnn_f32_vmulcaddc_ukernel_c4__neon_2x()
|
D | c8-neonfma-2x.c | 122 float32x2_t vacc1x01 = vget_low_f32(vacc1x0123); in xnn_f32_vmulcaddc_ukernel_c8__neonfma_2x() local 125 vst1_f32(o1, vacc1x01); o1 += 2; in xnn_f32_vmulcaddc_ukernel_c8__neonfma_2x() 128 vacc1x01 = vget_high_f32(vacc1x0123); in xnn_f32_vmulcaddc_ukernel_c8__neonfma_2x() 132 vst1_lane_f32(o1, vacc1x01, 0); o1 += 1; in xnn_f32_vmulcaddc_ukernel_c8__neonfma_2x()
|
D | c8-neon-2x.c | 130 float32x2_t vacc1x01 = vget_low_f32(vacc1x0123); in xnn_f32_vmulcaddc_ukernel_c8__neon_2x() local 133 vst1_f32(o1, vacc1x01); o1 += 2; in xnn_f32_vmulcaddc_ukernel_c8__neon_2x() 136 vacc1x01 = vget_high_f32(vacc1x0123); in xnn_f32_vmulcaddc_ukernel_c8__neon_2x() 140 vst1_lane_f32(o1, vacc1x01, 0); o1 += 1; in xnn_f32_vmulcaddc_ukernel_c8__neon_2x()
|
/external/XNNPACK/src/f32-prelu/gen/ |
D | neon-2x4.c | 95 float32x2_t vacc1x01 = vget_low_f32(vacc1x0123); in xnn_f32_prelu_ukernel__neon_2x4() local 98 vst1_f32(o1, vacc1x01); o1 += 2; in xnn_f32_prelu_ukernel__neon_2x4() 101 vacc1x01 = vget_high_f32(vacc1x0123); in xnn_f32_prelu_ukernel__neon_2x4() 105 vst1_lane_f32(o1, vacc1x01, 0); o1 += 1; in xnn_f32_prelu_ukernel__neon_2x4()
|
D | neon-2x8.c | 135 float32x2_t vacc1x01 = vget_low_f32(vacc1x0123); in xnn_f32_prelu_ukernel__neon_2x8() local 138 vst1_f32(o1, vacc1x01); o1 += 2; in xnn_f32_prelu_ukernel__neon_2x8() 141 vacc1x01 = vget_high_f32(vacc1x0123); in xnn_f32_prelu_ukernel__neon_2x8() 145 vst1_lane_f32(o1, vacc1x01, 0); o1 += 1; in xnn_f32_prelu_ukernel__neon_2x8()
|
/external/XNNPACK/src/f32-ppmm/gen/ |
D | 4x8-neon.c | 129 float32x2_t vacc1x01 = vget_low_f32(vacc1x0123); in xnn_f32_ppmm_ukernel_4x8__neon() local 134 vst1_f32(c1, vacc1x01); c1 += 2; in xnn_f32_ppmm_ukernel_4x8__neon() 139 vacc1x01 = vget_high_f32(vacc1x0123); in xnn_f32_ppmm_ukernel_4x8__neon() 145 vst1_lane_f32(c1, vacc1x01, 0); in xnn_f32_ppmm_ukernel_4x8__neon()
|
D | 4x8-neonfma.c | 145 float32x2_t vacc1x01 = vget_low_f32(vacc1x0123); in xnn_f32_ppmm_ukernel_4x8__neonfma() local 150 vst1_f32(c1, vacc1x01); c1 += 2; in xnn_f32_ppmm_ukernel_4x8__neonfma() 155 vacc1x01 = vget_high_f32(vacc1x0123); in xnn_f32_ppmm_ukernel_4x8__neonfma() 161 vst1_lane_f32(c1, vacc1x01, 0); in xnn_f32_ppmm_ukernel_4x8__neonfma()
|
/external/XNNPACK/src/f32-gemm/gen-inc/ |
D | 4x8-neonfma-lane-ld64.c | 175 float32x2_t vacc1x01 = vget_low_f32(vacc1x0123); in xnn_f32_gemminc_ukernel_4x8__neonfma_lane_ld64() local 180 vst1_f32(c1, vacc1x01); c1 += 2; in xnn_f32_gemminc_ukernel_4x8__neonfma_lane_ld64() 185 vacc1x01 = vget_high_f32(vacc1x0123); in xnn_f32_gemminc_ukernel_4x8__neonfma_lane_ld64() 191 vst1_lane_f32(c1, vacc1x01, 0); in xnn_f32_gemminc_ukernel_4x8__neonfma_lane_ld64()
|
D | 4x8-neon-lane-ld64.c | 175 float32x2_t vacc1x01 = vget_low_f32(vacc1x0123); in xnn_f32_gemminc_ukernel_4x8__neon_lane_ld64() local 180 vst1_f32(c1, vacc1x01); c1 += 2; in xnn_f32_gemminc_ukernel_4x8__neon_lane_ld64() 185 vacc1x01 = vget_high_f32(vacc1x0123); in xnn_f32_gemminc_ukernel_4x8__neon_lane_ld64() 191 vst1_lane_f32(c1, vacc1x01, 0); in xnn_f32_gemminc_ukernel_4x8__neon_lane_ld64()
|
D | 4x8-neon-dup-ld64.c | 183 float32x2_t vacc1x01 = vget_low_f32(vacc1x0123); in xnn_f32_gemminc_ukernel_4x8__neon_dup_ld64() local 188 vst1_f32(c1, vacc1x01); c1 += 2; in xnn_f32_gemminc_ukernel_4x8__neon_dup_ld64() 193 vacc1x01 = vget_high_f32(vacc1x0123); in xnn_f32_gemminc_ukernel_4x8__neon_dup_ld64() 199 vst1_lane_f32(c1, vacc1x01, 0); in xnn_f32_gemminc_ukernel_4x8__neon_dup_ld64()
|
D | 4x8-neonfma-dup-ld64.c | 183 float32x2_t vacc1x01 = vget_low_f32(vacc1x0123); in xnn_f32_gemminc_ukernel_4x8__neonfma_dup_ld64() local 188 vst1_f32(c1, vacc1x01); c1 += 2; in xnn_f32_gemminc_ukernel_4x8__neonfma_dup_ld64() 193 vacc1x01 = vget_high_f32(vacc1x0123); in xnn_f32_gemminc_ukernel_4x8__neonfma_dup_ld64() 199 vst1_lane_f32(c1, vacc1x01, 0); in xnn_f32_gemminc_ukernel_4x8__neonfma_dup_ld64()
|