Home
last modified time | relevance | path

Searched refs:vacc1x01 (Results 1 – 25 of 136) sorted by relevance

123456

/external/XNNPACK/src/f32-gemm/gen/
D4x2-minmax-neonfma-lane-ld64.c62 float32x2_t vacc1x01 = vacc0x01; in xnn_f32_gemm_minmax_ukernel_4x2__neonfma_lane_ld64() local
77 vacc1x01 = vfma_lane_f32(vacc1x01, vb01c0, va1, 0); in xnn_f32_gemm_minmax_ukernel_4x2__neonfma_lane_ld64()
86 vacc1x01 = vfma_f32(vacc1x01, va1c0, vb01c0); in xnn_f32_gemm_minmax_ukernel_4x2__neonfma_lane_ld64()
94 vacc1x01 = vfma_lane_f32(vacc1x01, vb01c1, va1, 1); in xnn_f32_gemm_minmax_ukernel_4x2__neonfma_lane_ld64()
103 vacc1x01 = vfma_f32(vacc1x01, va1c1, vb01c1); in xnn_f32_gemm_minmax_ukernel_4x2__neonfma_lane_ld64()
117 vacc1x01 = vfma_f32(vacc1x01, va1, vb01); in xnn_f32_gemm_minmax_ukernel_4x2__neonfma_lane_ld64()
124 vacc1x01 = vmin_f32(vacc1x01, vmax); in xnn_f32_gemm_minmax_ukernel_4x2__neonfma_lane_ld64()
130 vacc1x01 = vmax_f32(vacc1x01, vmin); in xnn_f32_gemm_minmax_ukernel_4x2__neonfma_lane_ld64()
137 vst1_f32(c1, vacc1x01); in xnn_f32_gemm_minmax_ukernel_4x2__neonfma_lane_ld64()
153 vst1_lane_f32(c1, vacc1x01, 0); in xnn_f32_gemm_minmax_ukernel_4x2__neonfma_lane_ld64()
D4x2-minmax-neon-lane-ld64.c62 float32x2_t vacc1x01 = vacc0x01; in xnn_f32_gemm_minmax_ukernel_4x2__neon_lane_ld64() local
76 vacc1x01 = vmla_lane_f32(vacc1x01, vb01c0, va1, 0); in xnn_f32_gemm_minmax_ukernel_4x2__neon_lane_ld64()
82 vacc1x01 = vmla_lane_f32(vacc1x01, vb01c1, va1, 1); in xnn_f32_gemm_minmax_ukernel_4x2__neon_lane_ld64()
95 vacc1x01 = vmla_f32(vacc1x01, va1, vb01); in xnn_f32_gemm_minmax_ukernel_4x2__neon_lane_ld64()
102 vacc1x01 = vmin_f32(vacc1x01, vmax); in xnn_f32_gemm_minmax_ukernel_4x2__neon_lane_ld64()
108 vacc1x01 = vmax_f32(vacc1x01, vmin); in xnn_f32_gemm_minmax_ukernel_4x2__neon_lane_ld64()
115 vst1_f32(c1, vacc1x01); in xnn_f32_gemm_minmax_ukernel_4x2__neon_lane_ld64()
131 vst1_lane_f32(c1, vacc1x01, 0); in xnn_f32_gemm_minmax_ukernel_4x2__neon_lane_ld64()
D4x8-minmax-neon-lane-ld64.c173 float32x2_t vacc1x01 = vget_low_f32(vacc1x0123); in xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld64() local
178 vst1_f32(c1, vacc1x01); c1 += 2; in xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld64()
183 vacc1x01 = vget_high_f32(vacc1x0123); in xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld64()
189 vst1_lane_f32(c1, vacc1x01, 0); in xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld64()
D4x8-minmax-neonfma-lane-ld64.c173 float32x2_t vacc1x01 = vget_low_f32(vacc1x0123); in xnn_f32_gemm_minmax_ukernel_4x8__neonfma_lane_ld64() local
178 vst1_f32(c1, vacc1x01); c1 += 2; in xnn_f32_gemm_minmax_ukernel_4x8__neonfma_lane_ld64()
183 vacc1x01 = vget_high_f32(vacc1x0123); in xnn_f32_gemm_minmax_ukernel_4x8__neonfma_lane_ld64()
189 vst1_lane_f32(c1, vacc1x01, 0); in xnn_f32_gemm_minmax_ukernel_4x8__neonfma_lane_ld64()
D4x8-minmax-neonfma-dup-ld64.c181 float32x2_t vacc1x01 = vget_low_f32(vacc1x0123); in xnn_f32_gemm_minmax_ukernel_4x8__neonfma_dup_ld64() local
186 vst1_f32(c1, vacc1x01); c1 += 2; in xnn_f32_gemm_minmax_ukernel_4x8__neonfma_dup_ld64()
191 vacc1x01 = vget_high_f32(vacc1x0123); in xnn_f32_gemm_minmax_ukernel_4x8__neonfma_dup_ld64()
197 vst1_lane_f32(c1, vacc1x01, 0); in xnn_f32_gemm_minmax_ukernel_4x8__neonfma_dup_ld64()
/external/XNNPACK/src/f32-igemm/gen/
D4x2-minmax-neonfma-lane-ld64.c60 float32x2_t vacc1x01 = vacc0x01; in xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64() local
99 vacc1x01 = vfma_lane_f32(vacc1x01, vb01c0, va1, 0); in xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64()
108 vacc1x01 = vfma_f32(vacc1x01, va1c0, vb01c0); in xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64()
116 vacc1x01 = vfma_lane_f32(vacc1x01, vb01c1, va1, 1); in xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64()
125 vacc1x01 = vfma_f32(vacc1x01, va1c1, vb01c1); in xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64()
139 vacc1x01 = vfma_f32(vacc1x01, va1, vb01); in xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64()
148 vacc1x01 = vmin_f32(vacc1x01, vmax); in xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64()
154 vacc1x01 = vmax_f32(vacc1x01, vmin); in xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64()
163 vst1_f32(c1, vacc1x01); in xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64()
174 vst1_lane_f32(c1, vacc1x01, 0); in xnn_f32_igemm_minmax_ukernel_4x2__neonfma_lane_ld64()
D4x2-minmax-neon-lane-ld64.c60 float32x2_t vacc1x01 = vacc0x01; in xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64() local
98 vacc1x01 = vmla_lane_f32(vacc1x01, vb01c0, va1, 0); in xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64()
104 vacc1x01 = vmla_lane_f32(vacc1x01, vb01c1, va1, 1); in xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64()
117 vacc1x01 = vmla_f32(vacc1x01, va1, vb01); in xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64()
126 vacc1x01 = vmin_f32(vacc1x01, vmax); in xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64()
132 vacc1x01 = vmax_f32(vacc1x01, vmin); in xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64()
141 vst1_f32(c1, vacc1x01); in xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64()
152 vst1_lane_f32(c1, vacc1x01, 0); in xnn_f32_igemm_minmax_ukernel_4x2__neon_lane_ld64()
D4x4-minmax-neon-lane-ld64.c152 float32x2_t vacc1x01 = vget_low_f32(vacc1x0123); in xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64() local
157 vst1_f32(c1, vacc1x01); c1 += 2; in xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64()
162 vacc1x01 = vget_high_f32(vacc1x0123); in xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64()
168 vst1_lane_f32(c1, vacc1x01, 0); in xnn_f32_igemm_minmax_ukernel_4x4__neon_lane_ld64()
D4x4-minmax-neonfma-lane-ld64.c152 float32x2_t vacc1x01 = vget_low_f32(vacc1x0123); in xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64() local
157 vst1_f32(c1, vacc1x01); c1 += 2; in xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64()
162 vacc1x01 = vget_high_f32(vacc1x0123); in xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64()
168 vst1_lane_f32(c1, vacc1x01, 0); in xnn_f32_igemm_minmax_ukernel_4x4__neonfma_lane_ld64()
D4x8-minmax-neon-dup-ld64.c202 float32x2_t vacc1x01 = vget_low_f32(vacc1x0123); in xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64() local
207 vst1_f32(c1, vacc1x01); c1 += 2; in xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64()
212 vacc1x01 = vget_high_f32(vacc1x0123); in xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64()
218 vst1_lane_f32(c1, vacc1x01, 0); in xnn_f32_igemm_minmax_ukernel_4x8__neon_dup_ld64()
D4x8-minmax-neonfma-dup-ld64.c202 float32x2_t vacc1x01 = vget_low_f32(vacc1x0123); in xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64() local
207 vst1_f32(c1, vacc1x01); c1 += 2; in xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64()
212 vacc1x01 = vget_high_f32(vacc1x0123); in xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64()
218 vst1_lane_f32(c1, vacc1x01, 0); in xnn_f32_igemm_minmax_ukernel_4x8__neonfma_dup_ld64()
D4x8-minmax-neon-lane-ld64.c194 float32x2_t vacc1x01 = vget_low_f32(vacc1x0123); in xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld64() local
199 vst1_f32(c1, vacc1x01); c1 += 2; in xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld64()
204 vacc1x01 = vget_high_f32(vacc1x0123); in xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld64()
210 vst1_lane_f32(c1, vacc1x01, 0); in xnn_f32_igemm_minmax_ukernel_4x8__neon_lane_ld64()
/external/XNNPACK/src/f32-prelu/gen/
Dneon-2x4.c80 float32x2_t vacc1x01 = vget_low_f32(vacc1x0123); in xnn_f32_prelu_ukernel__neon_2x4() local
83 vst1_f32(o1, vacc1x01); o1 += 2; in xnn_f32_prelu_ukernel__neon_2x4()
86 vacc1x01 = vget_high_f32(vacc1x0123); in xnn_f32_prelu_ukernel__neon_2x4()
90 vst1_lane_f32(o1, vacc1x01, 0); o1 += 1; in xnn_f32_prelu_ukernel__neon_2x4()
Dneon-2x8.c110 float32x2_t vacc1x01 = vget_low_f32(vacc1x0123); in xnn_f32_prelu_ukernel__neon_2x8() local
113 vst1_f32(o1, vacc1x01); o1 += 2; in xnn_f32_prelu_ukernel__neon_2x8()
116 vacc1x01 = vget_high_f32(vacc1x0123); in xnn_f32_prelu_ukernel__neon_2x8()
120 vst1_lane_f32(o1, vacc1x01, 0); o1 += 1; in xnn_f32_prelu_ukernel__neon_2x8()
Dneon-4x4.c112 float32x2_t vacc1x01 = vget_low_f32(vacc1x0123); in xnn_f32_prelu_ukernel__neon_4x4() local
117 vst1_f32(o1, vacc1x01); o1 += 2; in xnn_f32_prelu_ukernel__neon_4x4()
122 vacc1x01 = vget_high_f32(vacc1x0123); in xnn_f32_prelu_ukernel__neon_4x4()
128 vst1_lane_f32(o1, vacc1x01, 0); o1 += 1; in xnn_f32_prelu_ukernel__neon_4x4()
Dneon-2x16.c132 float32x2_t vacc1x01 = vget_low_f32(vacc1x0123); in xnn_f32_prelu_ukernel__neon_2x16() local
135 vst1_f32(o1, vacc1x01); o1 += 2; in xnn_f32_prelu_ukernel__neon_2x16()
138 vacc1x01 = vget_high_f32(vacc1x0123); in xnn_f32_prelu_ukernel__neon_2x16()
142 vst1_lane_f32(o1, vacc1x01, 0); o1 += 1; in xnn_f32_prelu_ukernel__neon_2x16()
/external/XNNPACK/src/f32-vmulcaddc/gen/
Dc4-minmax-neonfma-2x.c89 float32x2_t vacc1x01 = vget_low_f32(vacc1x0123); in xnn_f32_vmulcaddc_minmax_ukernel_c4__neonfma_2x() local
92 vst1_f32(o1, vacc1x01); o1 += 2; in xnn_f32_vmulcaddc_minmax_ukernel_c4__neonfma_2x()
95 vacc1x01 = vget_high_f32(vacc1x0123); in xnn_f32_vmulcaddc_minmax_ukernel_c4__neonfma_2x()
99 vst1_lane_f32(o1, vacc1x01, 0); o1 += 1; in xnn_f32_vmulcaddc_minmax_ukernel_c4__neonfma_2x()
Dc4-minmax-neon-2x.c93 float32x2_t vacc1x01 = vget_low_f32(vacc1x0123); in xnn_f32_vmulcaddc_minmax_ukernel_c4__neon_2x() local
96 vst1_f32(o1, vacc1x01); o1 += 2; in xnn_f32_vmulcaddc_minmax_ukernel_c4__neon_2x()
99 vacc1x01 = vget_high_f32(vacc1x0123); in xnn_f32_vmulcaddc_minmax_ukernel_c4__neon_2x()
103 vst1_lane_f32(o1, vacc1x01, 0); o1 += 1; in xnn_f32_vmulcaddc_minmax_ukernel_c4__neon_2x()
Dc8-minmax-neonfma-2x.c122 float32x2_t vacc1x01 = vget_low_f32(vacc1x0123); in xnn_f32_vmulcaddc_minmax_ukernel_c8__neonfma_2x() local
125 vst1_f32(o1, vacc1x01); o1 += 2; in xnn_f32_vmulcaddc_minmax_ukernel_c8__neonfma_2x()
128 vacc1x01 = vget_high_f32(vacc1x0123); in xnn_f32_vmulcaddc_minmax_ukernel_c8__neonfma_2x()
132 vst1_lane_f32(o1, vacc1x01, 0); o1 += 1; in xnn_f32_vmulcaddc_minmax_ukernel_c8__neonfma_2x()
Dc8-minmax-neon-2x.c130 float32x2_t vacc1x01 = vget_low_f32(vacc1x0123); in xnn_f32_vmulcaddc_minmax_ukernel_c8__neon_2x() local
133 vst1_f32(o1, vacc1x01); o1 += 2; in xnn_f32_vmulcaddc_minmax_ukernel_c8__neon_2x()
136 vacc1x01 = vget_high_f32(vacc1x0123); in xnn_f32_vmulcaddc_minmax_ukernel_c8__neon_2x()
140 vst1_lane_f32(o1, vacc1x01, 0); o1 += 1; in xnn_f32_vmulcaddc_minmax_ukernel_c8__neon_2x()
/external/XNNPACK/src/f32-ppmm/gen/
D4x8-minmax-neon.c129 float32x2_t vacc1x01 = vget_low_f32(vacc1x0123); in xnn_f32_ppmm_minmax_ukernel_4x8__neon() local
134 vst1_f32(c1, vacc1x01); c1 += 2; in xnn_f32_ppmm_minmax_ukernel_4x8__neon()
139 vacc1x01 = vget_high_f32(vacc1x0123); in xnn_f32_ppmm_minmax_ukernel_4x8__neon()
145 vst1_lane_f32(c1, vacc1x01, 0); in xnn_f32_ppmm_minmax_ukernel_4x8__neon()
D4x8-minmax-neonfma.c145 float32x2_t vacc1x01 = vget_low_f32(vacc1x0123); in xnn_f32_ppmm_minmax_ukernel_4x8__neonfma() local
150 vst1_f32(c1, vacc1x01); c1 += 2; in xnn_f32_ppmm_minmax_ukernel_4x8__neonfma()
155 vacc1x01 = vget_high_f32(vacc1x0123); in xnn_f32_ppmm_minmax_ukernel_4x8__neonfma()
161 vst1_lane_f32(c1, vacc1x01, 0); in xnn_f32_ppmm_minmax_ukernel_4x8__neonfma()
/external/XNNPACK/src/qs8-gemm/gen/
D2x8c8-xw-minmax-avx2.c63 __m256i vacc1x01 = vacc0x01; in xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2() local
81 vacc1x01 = _mm256_add_epi32(vacc1x01, _mm256_madd_epi16(vxa1, vxb01)); in xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2()
101 const __m256i vacc1x0213 = _mm256_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2()
/external/XNNPACK/src/f32-gemm/gen-inc/
D4x8inc-minmax-neonfma-lane-ld64.c175 float32x2_t vacc1x01 = vget_low_f32(vacc1x0123); in xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64() local
180 vst1_f32(c1, vacc1x01); c1 += 2; in xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64()
185 vacc1x01 = vget_high_f32(vacc1x0123); in xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64()
191 vst1_lane_f32(c1, vacc1x01, 0); in xnn_f32_gemminc_minmax_ukernel_4x8__neonfma_lane_ld64()
D4x8inc-minmax-neon-dup-ld64.c183 float32x2_t vacc1x01 = vget_low_f32(vacc1x0123); in xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64() local
188 vst1_f32(c1, vacc1x01); c1 += 2; in xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64()
193 vacc1x01 = vget_high_f32(vacc1x0123); in xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64()
199 vst1_lane_f32(c1, vacc1x01, 0); in xnn_f32_gemminc_minmax_ukernel_4x8__neon_dup_ld64()

123456