/external/XNNPACK/src/f32-gemm/gen/ |
D | 4x2-minmax-neonfma-lane-ld64.c | 62 float32x2_t vacc1x01 = vacc0x01; in xnn_f32_gemm_minmax_ukernel_4x2__neonfma_lane_ld64() local 77 vacc1x01 = vfma_lane_f32(vacc1x01, vb01c0, va1, 0); in xnn_f32_gemm_minmax_ukernel_4x2__neonfma_lane_ld64() 86 vacc1x01 = vfma_f32(vacc1x01, va1c0, vb01c0); in xnn_f32_gemm_minmax_ukernel_4x2__neonfma_lane_ld64() 94 vacc1x01 = vfma_lane_f32(vacc1x01, vb01c1, va1, 1); in xnn_f32_gemm_minmax_ukernel_4x2__neonfma_lane_ld64() 103 vacc1x01 = vfma_f32(vacc1x01, va1c1, vb01c1); in xnn_f32_gemm_minmax_ukernel_4x2__neonfma_lane_ld64() 117 vacc1x01 = vfma_f32(vacc1x01, va1, vb01); in xnn_f32_gemm_minmax_ukernel_4x2__neonfma_lane_ld64() 124 vacc1x01 = vmin_f32(vacc1x01, vmax); in xnn_f32_gemm_minmax_ukernel_4x2__neonfma_lane_ld64() 130 vacc1x01 = vmax_f32(vacc1x01, vmin); in xnn_f32_gemm_minmax_ukernel_4x2__neonfma_lane_ld64() 137 vst1_f32(c1, vacc1x01); in xnn_f32_gemm_minmax_ukernel_4x2__neonfma_lane_ld64() 153 vst1_lane_f32(c1, vacc1x01, 0); in xnn_f32_gemm_minmax_ukernel_4x2__neonfma_lane_ld64()
|
D | 4x2-minmax-neon-lane-ld64.c | 62 float32x2_t vacc1x01 = vacc0x01; in xnn_f32_gemm_minmax_ukernel_4x2__neon_lane_ld64() local 76 vacc1x01 = vmla_lane_f32(vacc1x01, vb01c0, va1, 0); in xnn_f32_gemm_minmax_ukernel_4x2__neon_lane_ld64() 82 vacc1x01 = vmla_lane_f32(vacc1x01, vb01c1, va1, 1); in xnn_f32_gemm_minmax_ukernel_4x2__neon_lane_ld64() 95 vacc1x01 = vmla_f32(vacc1x01, va1, vb01); in xnn_f32_gemm_minmax_ukernel_4x2__neon_lane_ld64() 102 vacc1x01 = vmin_f32(vacc1x01, vmax); in xnn_f32_gemm_minmax_ukernel_4x2__neon_lane_ld64() 108 vacc1x01 = vmax_f32(vacc1x01, vmin); in xnn_f32_gemm_minmax_ukernel_4x2__neon_lane_ld64() 115 vst1_f32(c1, vacc1x01); in xnn_f32_gemm_minmax_ukernel_4x2__neon_lane_ld64() 131 vst1_lane_f32(c1, vacc1x01, 0); in xnn_f32_gemm_minmax_ukernel_4x2__neon_lane_ld64()
|
D | 4x8-minmax-neon-lane-ld64.c | 173 float32x2_t vacc1x01 = vget_low_f32(vacc1x0123); in xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld64() local 178 vst1_f32(c1, vacc1x01); c1 += 2; in xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld64() 183 vacc1x01 = vget_high_f32(vacc1x0123); in xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld64() 189 vst1_lane_f32(c1, vacc1x01, 0); in xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld64()
|
D | 4x8-minmax-neonfma-lane-ld64.c | 173 float32x2_t vacc1x01 = vget_low_f32(vacc1x0123); in xnn_f32_gemm_minmax_ukernel_4x8__neonfma_lane_ld64() local 178 vst1_f32(c1, vacc1x01); c1 += 2; in xnn_f32_gemm_minmax_ukernel_4x8__neonfma_lane_ld64() 183 vacc1x01 = vget_high_f32(vacc1x0123); in xnn_f32_gemm_minmax_ukernel_4x8__neonfma_lane_ld64() 189 vst1_lane_f32(c1, vacc1x01, 0); in xnn_f32_gemm_minmax_ukernel_4x8__neonfma_lane_ld64()
|
D | 4x8-minmax-neonfma-dup-ld64.c | 181 float32x2_t vacc1x01 = vget_low_f32(vacc1x0123); in xnn_f32_gemm_minmax_ukernel_4x8__neonfma_dup_ld64() local 186 vst1_f32(c1, vacc1x01); c1 += 2; in xnn_f32_gemm_minmax_ukernel_4x8__neonfma_dup_ld64() 191 vacc1x01 = vget_high_f32(vacc1x0123); in xnn_f32_gemm_minmax_ukernel_4x8__neonfma_dup_ld64() 197 vst1_lane_f32(c1, vacc1x01, 0); in xnn_f32_gemm_minmax_ukernel_4x8__neonfma_dup_ld64()
|
/external/XNNPACK/src/f32-igemm/gen/ |
D | 4x2-minmax-neonfma-lane-ld64.c | 60 float32x2_t vacc1x01 = vacc0x01; in xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64() local 99 vacc1x01 = vfma_lane_f32(vacc1x01, vb01c0, va1, 0); in xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64() 108 vacc1x01 = vfma_f32(vacc1x01, va1c0, vb01c0); in xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64() 116 vacc1x01 = vfma_lane_f32(vacc1x01, vb01c1, va1, 1); in xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64() 125 vacc1x01 = vfma_f32(vacc1x01, va1c1, vb01c1); in xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64() 139 vacc1x01 = vfma_f32(vacc1x01, va1, vb01); in xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64() 148 vacc1x01 = vmin_f32(vacc1x01, vmax); in xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64() 154 vacc1x01 = vmax_f32(vacc1x01, vmin); in xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64() 163 vst1_f32(c1, vacc1x01); in xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64() 174 vst1_lane_f32(c1, vacc1x01, 0); in xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64()
|
D | 4x2-minmax-neon-lane-ld64.c | 60 float32x2_t vacc1x01 = vacc0x01; in xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64() local 98 vacc1x01 = vmla_lane_f32(vacc1x01, vb01c0, va1, 0); in xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64() 104 vacc1x01 = vmla_lane_f32(vacc1x01, vb01c1, va1, 1); in xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64() 117 vacc1x01 = vmla_f32(vacc1x01, va1, vb01); in xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64() 126 vacc1x01 = vmin_f32(vacc1x01, vmax); in xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64() 132 vacc1x01 = vmax_f32(vacc1x01, vmin); in xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64() 141 vst1_f32(c1, vacc1x01); in xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64() 152 vst1_lane_f32(c1, vacc1x01, 0); in xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64()
|
D | 4x4-minmax-neon-lane-ld64.c | 152 float32x2_t vacc1x01 = vget_low_f32(vacc1x0123); in xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64() local 157 vst1_f32(c1, vacc1x01); c1 += 2; in xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64() 162 vacc1x01 = vget_high_f32(vacc1x0123); in xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64() 168 vst1_lane_f32(c1, vacc1x01, 0); in xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64()
|
D | 4x4-minmax-neonfma-lane-ld64.c | 152 float32x2_t vacc1x01 = vget_low_f32(vacc1x0123); in xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64() local 157 vst1_f32(c1, vacc1x01); c1 += 2; in xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64() 162 vacc1x01 = vget_high_f32(vacc1x0123); in xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64() 168 vst1_lane_f32(c1, vacc1x01, 0); in xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64()
|
D | 4x8-minmax-neon-dup-ld64.c | 202 float32x2_t vacc1x01 = vget_low_f32(vacc1x0123); in xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64() local 207 vst1_f32(c1, vacc1x01); c1 += 2; in xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64() 212 vacc1x01 = vget_high_f32(vacc1x0123); in xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64() 218 vst1_lane_f32(c1, vacc1x01, 0); in xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64()
|
D | 4x8-minmax-neonfma-dup-ld64.c | 202 float32x2_t vacc1x01 = vget_low_f32(vacc1x0123); in xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64() local 207 vst1_f32(c1, vacc1x01); c1 += 2; in xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64() 212 vacc1x01 = vget_high_f32(vacc1x0123); in xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64() 218 vst1_lane_f32(c1, vacc1x01, 0); in xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64()
|
D | 4x8-minmax-neon-lane-ld64.c | 194 float32x2_t vacc1x01 = vget_low_f32(vacc1x0123); in xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld64() local 199 vst1_f32(c1, vacc1x01); c1 += 2; in xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld64() 204 vacc1x01 = vget_high_f32(vacc1x0123); in xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld64() 210 vst1_lane_f32(c1, vacc1x01, 0); in xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld64()
|
/external/XNNPACK/src/f32-prelu/gen/ |
D | neon-2x4.c | 80 float32x2_t vacc1x01 = vget_low_f32(vacc1x0123); in xnn_f32_prelu_ukernel__neon_2x4() local 83 vst1_f32(o1, vacc1x01); o1 += 2; in xnn_f32_prelu_ukernel__neon_2x4() 86 vacc1x01 = vget_high_f32(vacc1x0123); in xnn_f32_prelu_ukernel__neon_2x4() 90 vst1_lane_f32(o1, vacc1x01, 0); o1 += 1; in xnn_f32_prelu_ukernel__neon_2x4()
|
D | neon-2x8.c | 110 float32x2_t vacc1x01 = vget_low_f32(vacc1x0123); in xnn_f32_prelu_ukernel__neon_2x8() local 113 vst1_f32(o1, vacc1x01); o1 += 2; in xnn_f32_prelu_ukernel__neon_2x8() 116 vacc1x01 = vget_high_f32(vacc1x0123); in xnn_f32_prelu_ukernel__neon_2x8() 120 vst1_lane_f32(o1, vacc1x01, 0); o1 += 1; in xnn_f32_prelu_ukernel__neon_2x8()
|
D | neon-4x4.c | 112 float32x2_t vacc1x01 = vget_low_f32(vacc1x0123); in xnn_f32_prelu_ukernel__neon_4x4() local 117 vst1_f32(o1, vacc1x01); o1 += 2; in xnn_f32_prelu_ukernel__neon_4x4() 122 vacc1x01 = vget_high_f32(vacc1x0123); in xnn_f32_prelu_ukernel__neon_4x4() 128 vst1_lane_f32(o1, vacc1x01, 0); o1 += 1; in xnn_f32_prelu_ukernel__neon_4x4()
|
D | neon-2x16.c | 132 float32x2_t vacc1x01 = vget_low_f32(vacc1x0123); in xnn_f32_prelu_ukernel__neon_2x16() local 135 vst1_f32(o1, vacc1x01); o1 += 2; in xnn_f32_prelu_ukernel__neon_2x16() 138 vacc1x01 = vget_high_f32(vacc1x0123); in xnn_f32_prelu_ukernel__neon_2x16() 142 vst1_lane_f32(o1, vacc1x01, 0); o1 += 1; in xnn_f32_prelu_ukernel__neon_2x16()
|
/external/XNNPACK/src/f32-vmulcaddc/gen/ |
D | c4-minmax-neonfma-2x.c | 89 float32x2_t vacc1x01 = vget_low_f32(vacc1x0123); in xnn_f32_vmulcaddc_minmax_ukernel_c4__neonfma_2x() local 92 vst1_f32(o1, vacc1x01); o1 += 2; in xnn_f32_vmulcaddc_minmax_ukernel_c4__neonfma_2x() 95 vacc1x01 = vget_high_f32(vacc1x0123); in xnn_f32_vmulcaddc_minmax_ukernel_c4__neonfma_2x() 99 vst1_lane_f32(o1, vacc1x01, 0); o1 += 1; in xnn_f32_vmulcaddc_minmax_ukernel_c4__neonfma_2x()
|
D | c4-minmax-neon-2x.c | 93 float32x2_t vacc1x01 = vget_low_f32(vacc1x0123); in xnn_f32_vmulcaddc_minmax_ukernel_c4__neon_2x() local 96 vst1_f32(o1, vacc1x01); o1 += 2; in xnn_f32_vmulcaddc_minmax_ukernel_c4__neon_2x() 99 vacc1x01 = vget_high_f32(vacc1x0123); in xnn_f32_vmulcaddc_minmax_ukernel_c4__neon_2x() 103 vst1_lane_f32(o1, vacc1x01, 0); o1 += 1; in xnn_f32_vmulcaddc_minmax_ukernel_c4__neon_2x()
|
D | c8-minmax-neonfma-2x.c | 122 float32x2_t vacc1x01 = vget_low_f32(vacc1x0123); in xnn_f32_vmulcaddc_minmax_ukernel_c8__neonfma_2x() local 125 vst1_f32(o1, vacc1x01); o1 += 2; in xnn_f32_vmulcaddc_minmax_ukernel_c8__neonfma_2x() 128 vacc1x01 = vget_high_f32(vacc1x0123); in xnn_f32_vmulcaddc_minmax_ukernel_c8__neonfma_2x() 132 vst1_lane_f32(o1, vacc1x01, 0); o1 += 1; in xnn_f32_vmulcaddc_minmax_ukernel_c8__neonfma_2x()
|
D | c8-minmax-neon-2x.c | 130 float32x2_t vacc1x01 = vget_low_f32(vacc1x0123); in xnn_f32_vmulcaddc_minmax_ukernel_c8__neon_2x() local 133 vst1_f32(o1, vacc1x01); o1 += 2; in xnn_f32_vmulcaddc_minmax_ukernel_c8__neon_2x() 136 vacc1x01 = vget_high_f32(vacc1x0123); in xnn_f32_vmulcaddc_minmax_ukernel_c8__neon_2x() 140 vst1_lane_f32(o1, vacc1x01, 0); o1 += 1; in xnn_f32_vmulcaddc_minmax_ukernel_c8__neon_2x()
|
/external/XNNPACK/src/f32-ppmm/gen/ |
D | 4x8-minmax-neon.c | 129 float32x2_t vacc1x01 = vget_low_f32(vacc1x0123); in xnn_f32_ppmm_minmax_ukernel_4x8__neon() local 134 vst1_f32(c1, vacc1x01); c1 += 2; in xnn_f32_ppmm_minmax_ukernel_4x8__neon() 139 vacc1x01 = vget_high_f32(vacc1x0123); in xnn_f32_ppmm_minmax_ukernel_4x8__neon() 145 vst1_lane_f32(c1, vacc1x01, 0); in xnn_f32_ppmm_minmax_ukernel_4x8__neon()
|
D | 4x8-minmax-neonfma.c | 145 float32x2_t vacc1x01 = vget_low_f32(vacc1x0123); in xnn_f32_ppmm_minmax_ukernel_4x8__neonfma() local 150 vst1_f32(c1, vacc1x01); c1 += 2; in xnn_f32_ppmm_minmax_ukernel_4x8__neonfma() 155 vacc1x01 = vget_high_f32(vacc1x0123); in xnn_f32_ppmm_minmax_ukernel_4x8__neonfma() 161 vst1_lane_f32(c1, vacc1x01, 0); in xnn_f32_ppmm_minmax_ukernel_4x8__neonfma()
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 2x8c8-xw-minmax-avx2.c | 63 __m256i vacc1x01 = vacc0x01; in xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2() local 81 vacc1x01 = _mm256_add_epi32(vacc1x01, _mm256_madd_epi16(vxa1, vxb01)); in xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2() 101 const __m256i vacc1x0213 = _mm256_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2()
|
/external/XNNPACK/src/f32-gemm/gen-inc/ |
D | 4x8inc-minmax-neonfma-lane-ld64.c | 175 float32x2_t vacc1x01 = vget_low_f32(vacc1x0123); in xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64() local 180 vst1_f32(c1, vacc1x01); c1 += 2; in xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64() 185 vacc1x01 = vget_high_f32(vacc1x0123); in xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64() 191 vst1_lane_f32(c1, vacc1x01, 0); in xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64()
|
D | 4x8inc-minmax-neon-dup-ld64.c | 183 float32x2_t vacc1x01 = vget_low_f32(vacc1x0123); in xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64() local 188 vst1_f32(c1, vacc1x01); c1 += 2; in xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64() 193 vacc1x01 = vget_high_f32(vacc1x0123); in xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64() 199 vst1_lane_f32(c1, vacc1x01, 0); in xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64()
|