Home
last modified time | relevance | path

Searched refs:vb0123c2 (Results 1 – 25 of 188) sorted by relevance

12345678

/external/XNNPACK/src/qs8-gemm/gen/
D4x8c2-minmax-neon-mull-padal-dup.c84 const int8x8_t vb0123c2 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mull_padal_dup() local
91 …const int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mull_padal_dup()
107 …const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mull_padal_dup()
123 …const int16x8_t vprod2x0123c2 = vmull_s8(vb0123c2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mull_padal_dup()
139 …const int16x8_t vprod3x0123c2 = vmull_s8(vb0123c2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mull_padal_dup()
205 … const int8x8_t vb0123c2 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mull_padal_dup() local
208 …const int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mull_padal_dup()
212 …const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mull_padal_dup()
216 …const int16x8_t vprod2x0123c2 = vmull_s8(vb0123c2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mull_padal_dup()
220 …const int16x8_t vprod3x0123c2 = vmull_s8(vb0123c2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mull_padal_dup()
D3x8c2-minmax-neon-mull-padal-dup.c75 const int8x8_t vb0123c2 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mull_padal_dup() local
82 …const int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mull_padal_dup()
98 …const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mull_padal_dup()
114 …const int16x8_t vprod2x0123c2 = vmull_s8(vb0123c2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mull_padal_dup()
171 … const int8x8_t vb0123c2 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mull_padal_dup() local
174 …const int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mull_padal_dup()
178 …const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mull_padal_dup()
182 …const int16x8_t vprod2x0123c2 = vmull_s8(vb0123c2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mull_padal_dup()
D2x8c2-minmax-neon-mull-padal-dup.c66 const int8x8_t vb0123c2 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mull_padal_dup() local
73 …const int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mull_padal_dup()
89 …const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mull_padal_dup()
137 … const int8x8_t vb0123c2 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mull_padal_dup() local
140 …const int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mull_padal_dup()
144 …const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mull_padal_dup()
/external/XNNPACK/src/qs8-igemm/gen/
D4x8c2-minmax-neon-mull-padal-dup.c101 … const int8x8_t vb0123c2 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mull_padal_dup() local
108 …const int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mull_padal_dup()
124 …const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mull_padal_dup()
140 …const int16x8_t vprod2x0123c2 = vmull_s8(vb0123c2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mull_padal_dup()
156 …const int16x8_t vprod3x0123c2 = vmull_s8(vb0123c2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mull_padal_dup()
222 … const int8x8_t vb0123c2 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mull_padal_dup() local
225 …const int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mull_padal_dup()
229 …const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mull_padal_dup()
233 …const int16x8_t vprod2x0123c2 = vmull_s8(vb0123c2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mull_padal_dup()
237 …const int16x8_t vprod3x0123c2 = vmull_s8(vb0123c2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_igemm_minmax_ukernel_4x8c2__neon_mull_padal_dup()
D3x8c2-minmax-neon-mull-padal-dup.c90 … const int8x8_t vb0123c2 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mull_padal_dup() local
97 …const int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mull_padal_dup()
113 …const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mull_padal_dup()
129 …const int16x8_t vprod2x0123c2 = vmull_s8(vb0123c2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mull_padal_dup()
186 … const int8x8_t vb0123c2 = vld1_s8(w); w = (const void*) ((uintptr_t) w + 8 * sizeof(int8_t)); in xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mull_padal_dup() local
189 …const int16x8_t vprod0x0123c2 = vmull_s8(vb0123c2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mull_padal_dup()
193 …const int16x8_t vprod1x0123c2 = vmull_s8(vb0123c2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mull_padal_dup()
197 …const int16x8_t vprod2x0123c2 = vmull_s8(vb0123c2, vreinterpret_s8_s16(vdup_lane_s16(vreinterpret_… in xnn_qs8_igemm_minmax_ukernel_3x8c2__neon_mull_padal_dup()
/external/XNNPACK/src/f32-gemm/gen/
D8x8s4-minmax-neonfma.c172 const float32x4_t vb0123c2 = vld1q_f32(w + 16); in xnn_f32_gemm_minmax_ukernel_8x8s4__neonfma() local
175 vacc0x0123 = vfmaq_f32(vacc0x0123, va0, vb0123c2); in xnn_f32_gemm_minmax_ukernel_8x8s4__neonfma()
176 vacc1x0123 = vfmaq_f32(vacc1x0123, va1, vb0123c2); in xnn_f32_gemm_minmax_ukernel_8x8s4__neonfma()
177 vacc2x0123 = vfmaq_f32(vacc2x0123, va2, vb0123c2); in xnn_f32_gemm_minmax_ukernel_8x8s4__neonfma()
178 vacc3x0123 = vfmaq_f32(vacc3x0123, va3, vb0123c2); in xnn_f32_gemm_minmax_ukernel_8x8s4__neonfma()
179 vacc4x0123 = vfmaq_f32(vacc4x0123, va4, vb0123c2); in xnn_f32_gemm_minmax_ukernel_8x8s4__neonfma()
180 vacc5x0123 = vfmaq_f32(vacc5x0123, va5, vb0123c2); in xnn_f32_gemm_minmax_ukernel_8x8s4__neonfma()
181 vacc6x0123 = vfmaq_f32(vacc6x0123, va6, vb0123c2); in xnn_f32_gemm_minmax_ukernel_8x8s4__neonfma()
182 vacc7x0123 = vfmaq_f32(vacc7x0123, va7, vb0123c2); in xnn_f32_gemm_minmax_ukernel_8x8s4__neonfma()
D8x8s4-minmax-neon.c172 const float32x4_t vb0123c2 = vld1q_f32(w + 16); in xnn_f32_gemm_minmax_ukernel_8x8s4__neon() local
175 vacc0x0123 = vmlaq_f32(vacc0x0123, va0, vb0123c2); in xnn_f32_gemm_minmax_ukernel_8x8s4__neon()
176 vacc1x0123 = vmlaq_f32(vacc1x0123, va1, vb0123c2); in xnn_f32_gemm_minmax_ukernel_8x8s4__neon()
177 vacc2x0123 = vmlaq_f32(vacc2x0123, va2, vb0123c2); in xnn_f32_gemm_minmax_ukernel_8x8s4__neon()
178 vacc3x0123 = vmlaq_f32(vacc3x0123, va3, vb0123c2); in xnn_f32_gemm_minmax_ukernel_8x8s4__neon()
179 vacc4x0123 = vmlaq_f32(vacc4x0123, va4, vb0123c2); in xnn_f32_gemm_minmax_ukernel_8x8s4__neon()
180 vacc5x0123 = vmlaq_f32(vacc5x0123, va5, vb0123c2); in xnn_f32_gemm_minmax_ukernel_8x8s4__neon()
181 vacc6x0123 = vmlaq_f32(vacc6x0123, va6, vb0123c2); in xnn_f32_gemm_minmax_ukernel_8x8s4__neon()
182 vacc7x0123 = vmlaq_f32(vacc7x0123, va7, vb0123c2); in xnn_f32_gemm_minmax_ukernel_8x8s4__neon()
D6x8-minmax-neonfma-lane-ld128.c128 const float32x4_t vb0123c2 = vld1q_f32(w); w += 4; in xnn_f32_gemm_minmax_ukernel_6x8__neonfma_lane_ld128() local
131 vacc0x0123 = vfmaq_lane_f32(vacc0x0123, vb0123c2, vget_high_f32(va0), 0); in xnn_f32_gemm_minmax_ukernel_6x8__neonfma_lane_ld128()
132 vacc1x0123 = vfmaq_lane_f32(vacc1x0123, vb0123c2, vget_high_f32(va1), 0); in xnn_f32_gemm_minmax_ukernel_6x8__neonfma_lane_ld128()
133 vacc2x0123 = vfmaq_lane_f32(vacc2x0123, vb0123c2, vget_high_f32(va2), 0); in xnn_f32_gemm_minmax_ukernel_6x8__neonfma_lane_ld128()
134 vacc3x0123 = vfmaq_lane_f32(vacc3x0123, vb0123c2, vget_high_f32(va3), 0); in xnn_f32_gemm_minmax_ukernel_6x8__neonfma_lane_ld128()
135 vacc4x0123 = vfmaq_lane_f32(vacc4x0123, vb0123c2, vget_high_f32(va4), 0); in xnn_f32_gemm_minmax_ukernel_6x8__neonfma_lane_ld128()
136 vacc5x0123 = vfmaq_lane_f32(vacc5x0123, vb0123c2, vget_high_f32(va5), 0); in xnn_f32_gemm_minmax_ukernel_6x8__neonfma_lane_ld128()
D6x8s4-minmax-neon.c142 const float32x4_t vb0123c2 = vld1q_f32(w + 16); in xnn_f32_gemm_minmax_ukernel_6x8s4__neon() local
145 vacc0x0123 = vmlaq_f32(vacc0x0123, va0, vb0123c2); in xnn_f32_gemm_minmax_ukernel_6x8s4__neon()
146 vacc1x0123 = vmlaq_f32(vacc1x0123, va1, vb0123c2); in xnn_f32_gemm_minmax_ukernel_6x8s4__neon()
147 vacc2x0123 = vmlaq_f32(vacc2x0123, va2, vb0123c2); in xnn_f32_gemm_minmax_ukernel_6x8s4__neon()
148 vacc3x0123 = vmlaq_f32(vacc3x0123, va3, vb0123c2); in xnn_f32_gemm_minmax_ukernel_6x8s4__neon()
149 vacc4x0123 = vmlaq_f32(vacc4x0123, va4, vb0123c2); in xnn_f32_gemm_minmax_ukernel_6x8s4__neon()
150 vacc5x0123 = vmlaq_f32(vacc5x0123, va5, vb0123c2); in xnn_f32_gemm_minmax_ukernel_6x8s4__neon()
D6x8s4-minmax-neonfma.c142 const float32x4_t vb0123c2 = vld1q_f32(w + 16); in xnn_f32_gemm_minmax_ukernel_6x8s4__neonfma() local
145 vacc0x0123 = vfmaq_f32(vacc0x0123, va0, vb0123c2); in xnn_f32_gemm_minmax_ukernel_6x8s4__neonfma()
146 vacc1x0123 = vfmaq_f32(vacc1x0123, va1, vb0123c2); in xnn_f32_gemm_minmax_ukernel_6x8s4__neonfma()
147 vacc2x0123 = vfmaq_f32(vacc2x0123, va2, vb0123c2); in xnn_f32_gemm_minmax_ukernel_6x8s4__neonfma()
148 vacc3x0123 = vfmaq_f32(vacc3x0123, va3, vb0123c2); in xnn_f32_gemm_minmax_ukernel_6x8s4__neonfma()
149 vacc4x0123 = vfmaq_f32(vacc4x0123, va4, vb0123c2); in xnn_f32_gemm_minmax_ukernel_6x8s4__neonfma()
150 vacc5x0123 = vfmaq_f32(vacc5x0123, va5, vb0123c2); in xnn_f32_gemm_minmax_ukernel_6x8s4__neonfma()
D6x8-minmax-neon-lane-ld128.c128 const float32x4_t vb0123c2 = vld1q_f32(w); w += 4; in xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld128() local
131 vacc0x0123 = vmlaq_lane_f32(vacc0x0123, vb0123c2, vget_high_f32(va0), 0); in xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld128()
132 vacc1x0123 = vmlaq_lane_f32(vacc1x0123, vb0123c2, vget_high_f32(va1), 0); in xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld128()
133 vacc2x0123 = vmlaq_lane_f32(vacc2x0123, vb0123c2, vget_high_f32(va2), 0); in xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld128()
134 vacc3x0123 = vmlaq_lane_f32(vacc3x0123, vb0123c2, vget_high_f32(va3), 0); in xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld128()
135 vacc4x0123 = vmlaq_lane_f32(vacc4x0123, vb0123c2, vget_high_f32(va4), 0); in xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld128()
136 vacc5x0123 = vmlaq_lane_f32(vacc5x0123, vb0123c2, vget_high_f32(va5), 0); in xnn_f32_gemm_minmax_ukernel_6x8__neon_lane_ld128()
D4x8-minmax-neonfma-lane-ld128.c102 const float32x4_t vb0123c2 = vld1q_f32(w); w += 4; in xnn_f32_gemm_minmax_ukernel_4x8__neonfma_lane_ld128() local
105 vacc0x0123 = vfmaq_lane_f32(vacc0x0123, vb0123c2, vget_high_f32(va0), 0); in xnn_f32_gemm_minmax_ukernel_4x8__neonfma_lane_ld128()
106 vacc1x0123 = vfmaq_lane_f32(vacc1x0123, vb0123c2, vget_high_f32(va1), 0); in xnn_f32_gemm_minmax_ukernel_4x8__neonfma_lane_ld128()
107 vacc2x0123 = vfmaq_lane_f32(vacc2x0123, vb0123c2, vget_high_f32(va2), 0); in xnn_f32_gemm_minmax_ukernel_4x8__neonfma_lane_ld128()
108 vacc3x0123 = vfmaq_lane_f32(vacc3x0123, vb0123c2, vget_high_f32(va3), 0); in xnn_f32_gemm_minmax_ukernel_4x8__neonfma_lane_ld128()
D4x8-minmax-neon-lane-ld128.c102 const float32x4_t vb0123c2 = vld1q_f32(w); w += 4; in xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld128() local
105 vacc0x0123 = vmlaq_lane_f32(vacc0x0123, vb0123c2, vget_high_f32(va0), 0); in xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld128()
106 vacc1x0123 = vmlaq_lane_f32(vacc1x0123, vb0123c2, vget_high_f32(va1), 0); in xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld128()
107 vacc2x0123 = vmlaq_lane_f32(vacc2x0123, vb0123c2, vget_high_f32(va2), 0); in xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld128()
108 vacc3x0123 = vmlaq_lane_f32(vacc3x0123, vb0123c2, vget_high_f32(va3), 0); in xnn_f32_gemm_minmax_ukernel_4x8__neon_lane_ld128()
D4x8-wasmsimd-splat.c118 const v128_t vb0123c2 = wasm_v128_load(w + 16); in xnn_f32_gemm_ukernel_4x8__wasmsimd_splat() local
121 vacc0x0123 = wasm_f32x4_add(vacc0x0123, wasm_f32x4_mul(va0c2, vb0123c2)); in xnn_f32_gemm_ukernel_4x8__wasmsimd_splat()
122 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1c2, vb0123c2)); in xnn_f32_gemm_ukernel_4x8__wasmsimd_splat()
123 vacc2x0123 = wasm_f32x4_add(vacc2x0123, wasm_f32x4_mul(va2c2, vb0123c2)); in xnn_f32_gemm_ukernel_4x8__wasmsimd_splat()
124 vacc3x0123 = wasm_f32x4_add(vacc3x0123, wasm_f32x4_mul(va3c2, vb0123c2)); in xnn_f32_gemm_ukernel_4x8__wasmsimd_splat()
D5x8-relu-wasmsimd-splat.c135 const v128_t vb0123c2 = wasm_v128_load(w + 16); in xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_splat() local
138 vacc0x0123 = wasm_f32x4_add(vacc0x0123, wasm_f32x4_mul(va0c2, vb0123c2)); in xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_splat()
139 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1c2, vb0123c2)); in xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_splat()
140 vacc2x0123 = wasm_f32x4_add(vacc2x0123, wasm_f32x4_mul(va2c2, vb0123c2)); in xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_splat()
141 vacc3x0123 = wasm_f32x4_add(vacc3x0123, wasm_f32x4_mul(va3c2, vb0123c2)); in xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_splat()
142 vacc4x0123 = wasm_f32x4_add(vacc4x0123, wasm_f32x4_mul(va4c2, vb0123c2)); in xnn_f32_gemm_relu_ukernel_5x8__wasmsimd_splat()
/external/XNNPACK/src/f32-gemm/gen-inc/
D8x8s4inc-minmax-neon.c174 const float32x4_t vb0123c2 = vld1q_f32(w + 16); in xnn_f32_gemminc_minmax_ukernel_8x8s4__neon() local
177 vacc0x0123 = vmlaq_f32(vacc0x0123, va0, vb0123c2); in xnn_f32_gemminc_minmax_ukernel_8x8s4__neon()
178 vacc1x0123 = vmlaq_f32(vacc1x0123, va1, vb0123c2); in xnn_f32_gemminc_minmax_ukernel_8x8s4__neon()
179 vacc2x0123 = vmlaq_f32(vacc2x0123, va2, vb0123c2); in xnn_f32_gemminc_minmax_ukernel_8x8s4__neon()
180 vacc3x0123 = vmlaq_f32(vacc3x0123, va3, vb0123c2); in xnn_f32_gemminc_minmax_ukernel_8x8s4__neon()
181 vacc4x0123 = vmlaq_f32(vacc4x0123, va4, vb0123c2); in xnn_f32_gemminc_minmax_ukernel_8x8s4__neon()
182 vacc5x0123 = vmlaq_f32(vacc5x0123, va5, vb0123c2); in xnn_f32_gemminc_minmax_ukernel_8x8s4__neon()
183 vacc6x0123 = vmlaq_f32(vacc6x0123, va6, vb0123c2); in xnn_f32_gemminc_minmax_ukernel_8x8s4__neon()
184 vacc7x0123 = vmlaq_f32(vacc7x0123, va7, vb0123c2); in xnn_f32_gemminc_minmax_ukernel_8x8s4__neon()
D8x8s4inc-minmax-neonfma.c174 const float32x4_t vb0123c2 = vld1q_f32(w + 16); in xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma() local
177 vacc0x0123 = vfmaq_f32(vacc0x0123, va0, vb0123c2); in xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma()
178 vacc1x0123 = vfmaq_f32(vacc1x0123, va1, vb0123c2); in xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma()
179 vacc2x0123 = vfmaq_f32(vacc2x0123, va2, vb0123c2); in xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma()
180 vacc3x0123 = vfmaq_f32(vacc3x0123, va3, vb0123c2); in xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma()
181 vacc4x0123 = vfmaq_f32(vacc4x0123, va4, vb0123c2); in xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma()
182 vacc5x0123 = vfmaq_f32(vacc5x0123, va5, vb0123c2); in xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma()
183 vacc6x0123 = vfmaq_f32(vacc6x0123, va6, vb0123c2); in xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma()
184 vacc7x0123 = vfmaq_f32(vacc7x0123, va7, vb0123c2); in xnn_f32_gemminc_minmax_ukernel_8x8s4__neonfma()
D6x8inc-minmax-neon-lane-ld128.c130 const float32x4_t vb0123c2 = vld1q_f32(w); w += 4; in xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128() local
133 vacc0x0123 = vmlaq_lane_f32(vacc0x0123, vb0123c2, vget_high_f32(va0), 0); in xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128()
134 vacc1x0123 = vmlaq_lane_f32(vacc1x0123, vb0123c2, vget_high_f32(va1), 0); in xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128()
135 vacc2x0123 = vmlaq_lane_f32(vacc2x0123, vb0123c2, vget_high_f32(va2), 0); in xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128()
136 vacc3x0123 = vmlaq_lane_f32(vacc3x0123, vb0123c2, vget_high_f32(va3), 0); in xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128()
137 vacc4x0123 = vmlaq_lane_f32(vacc4x0123, vb0123c2, vget_high_f32(va4), 0); in xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128()
138 vacc5x0123 = vmlaq_lane_f32(vacc5x0123, vb0123c2, vget_high_f32(va5), 0); in xnn_f32_gemminc_minmax_ukernel_6x8__neon_lane_ld128()
D6x8s4inc-minmax-neon.c144 const float32x4_t vb0123c2 = vld1q_f32(w + 16); in xnn_f32_gemminc_minmax_ukernel_6x8s4__neon() local
147 vacc0x0123 = vmlaq_f32(vacc0x0123, va0, vb0123c2); in xnn_f32_gemminc_minmax_ukernel_6x8s4__neon()
148 vacc1x0123 = vmlaq_f32(vacc1x0123, va1, vb0123c2); in xnn_f32_gemminc_minmax_ukernel_6x8s4__neon()
149 vacc2x0123 = vmlaq_f32(vacc2x0123, va2, vb0123c2); in xnn_f32_gemminc_minmax_ukernel_6x8s4__neon()
150 vacc3x0123 = vmlaq_f32(vacc3x0123, va3, vb0123c2); in xnn_f32_gemminc_minmax_ukernel_6x8s4__neon()
151 vacc4x0123 = vmlaq_f32(vacc4x0123, va4, vb0123c2); in xnn_f32_gemminc_minmax_ukernel_6x8s4__neon()
152 vacc5x0123 = vmlaq_f32(vacc5x0123, va5, vb0123c2); in xnn_f32_gemminc_minmax_ukernel_6x8s4__neon()
D6x8s4inc-minmax-neonfma.c144 const float32x4_t vb0123c2 = vld1q_f32(w + 16); in xnn_f32_gemminc_minmax_ukernel_6x8s4__neonfma() local
147 vacc0x0123 = vfmaq_f32(vacc0x0123, va0, vb0123c2); in xnn_f32_gemminc_minmax_ukernel_6x8s4__neonfma()
148 vacc1x0123 = vfmaq_f32(vacc1x0123, va1, vb0123c2); in xnn_f32_gemminc_minmax_ukernel_6x8s4__neonfma()
149 vacc2x0123 = vfmaq_f32(vacc2x0123, va2, vb0123c2); in xnn_f32_gemminc_minmax_ukernel_6x8s4__neonfma()
150 vacc3x0123 = vfmaq_f32(vacc3x0123, va3, vb0123c2); in xnn_f32_gemminc_minmax_ukernel_6x8s4__neonfma()
151 vacc4x0123 = vfmaq_f32(vacc4x0123, va4, vb0123c2); in xnn_f32_gemminc_minmax_ukernel_6x8s4__neonfma()
152 vacc5x0123 = vfmaq_f32(vacc5x0123, va5, vb0123c2); in xnn_f32_gemminc_minmax_ukernel_6x8s4__neonfma()
D6x8inc-minmax-neonfma-lane-ld128.c130 const float32x4_t vb0123c2 = vld1q_f32(w); w += 4; in xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld128() local
133 vacc0x0123 = vfmaq_lane_f32(vacc0x0123, vb0123c2, vget_high_f32(va0), 0); in xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld128()
134 vacc1x0123 = vfmaq_lane_f32(vacc1x0123, vb0123c2, vget_high_f32(va1), 0); in xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld128()
135 vacc2x0123 = vfmaq_lane_f32(vacc2x0123, vb0123c2, vget_high_f32(va2), 0); in xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld128()
136 vacc3x0123 = vfmaq_lane_f32(vacc3x0123, vb0123c2, vget_high_f32(va3), 0); in xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld128()
137 vacc4x0123 = vfmaq_lane_f32(vacc4x0123, vb0123c2, vget_high_f32(va4), 0); in xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld128()
138 vacc5x0123 = vfmaq_lane_f32(vacc5x0123, vb0123c2, vget_high_f32(va5), 0); in xnn_f32_gemminc_minmax_ukernel_6x8__neonfma_lane_ld128()
/external/XNNPACK/src/f32-igemm/gen/
D8x8s4-minmax-neonfma.c205 const float32x4_t vb0123c2 = vld1q_f32(w + 16); in xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma() local
208 vacc0x0123 = vfmaq_f32(vacc0x0123, va0, vb0123c2); in xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma()
209 vacc1x0123 = vfmaq_f32(vacc1x0123, va1, vb0123c2); in xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma()
210 vacc2x0123 = vfmaq_f32(vacc2x0123, va2, vb0123c2); in xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma()
211 vacc3x0123 = vfmaq_f32(vacc3x0123, va3, vb0123c2); in xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma()
212 vacc4x0123 = vfmaq_f32(vacc4x0123, va4, vb0123c2); in xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma()
213 vacc5x0123 = vfmaq_f32(vacc5x0123, va5, vb0123c2); in xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma()
214 vacc6x0123 = vfmaq_f32(vacc6x0123, va6, vb0123c2); in xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma()
215 vacc7x0123 = vfmaq_f32(vacc7x0123, va7, vb0123c2); in xnn_f32_igemm_minmax_ukernel_8x8s4__neonfma()
D8x8s4-minmax-neon.c205 const float32x4_t vb0123c2 = vld1q_f32(w + 16); in xnn_f32_igemm_minmax_ukernel_8x8s4__neon() local
208 vacc0x0123 = vmlaq_f32(vacc0x0123, va0, vb0123c2); in xnn_f32_igemm_minmax_ukernel_8x8s4__neon()
209 vacc1x0123 = vmlaq_f32(vacc1x0123, va1, vb0123c2); in xnn_f32_igemm_minmax_ukernel_8x8s4__neon()
210 vacc2x0123 = vmlaq_f32(vacc2x0123, va2, vb0123c2); in xnn_f32_igemm_minmax_ukernel_8x8s4__neon()
211 vacc3x0123 = vmlaq_f32(vacc3x0123, va3, vb0123c2); in xnn_f32_igemm_minmax_ukernel_8x8s4__neon()
212 vacc4x0123 = vmlaq_f32(vacc4x0123, va4, vb0123c2); in xnn_f32_igemm_minmax_ukernel_8x8s4__neon()
213 vacc5x0123 = vmlaq_f32(vacc5x0123, va5, vb0123c2); in xnn_f32_igemm_minmax_ukernel_8x8s4__neon()
214 vacc6x0123 = vmlaq_f32(vacc6x0123, va6, vb0123c2); in xnn_f32_igemm_minmax_ukernel_8x8s4__neon()
215 vacc7x0123 = vmlaq_f32(vacc7x0123, va7, vb0123c2); in xnn_f32_igemm_minmax_ukernel_8x8s4__neon()
D6x8-minmax-neonfma-lane-ld128.c156 const float32x4_t vb0123c2 = vld1q_f32(w); w += 4; in xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128() local
159 vacc0x0123 = vfmaq_lane_f32(vacc0x0123, vb0123c2, vget_high_f32(va0), 0); in xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128()
160 vacc1x0123 = vfmaq_lane_f32(vacc1x0123, vb0123c2, vget_high_f32(va1), 0); in xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128()
161 vacc2x0123 = vfmaq_lane_f32(vacc2x0123, vb0123c2, vget_high_f32(va2), 0); in xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128()
162 vacc3x0123 = vfmaq_lane_f32(vacc3x0123, vb0123c2, vget_high_f32(va3), 0); in xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128()
163 vacc4x0123 = vfmaq_lane_f32(vacc4x0123, vb0123c2, vget_high_f32(va4), 0); in xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128()
164 vacc5x0123 = vfmaq_lane_f32(vacc5x0123, vb0123c2, vget_high_f32(va5), 0); in xnn_f32_igemm_minmax_ukernel_6x8__neonfma_lane_ld128()
D6x8-minmax-neon-lane-ld128.c156 const float32x4_t vb0123c2 = vld1q_f32(w); w += 4; in xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld128() local
159 vacc0x0123 = vmlaq_lane_f32(vacc0x0123, vb0123c2, vget_high_f32(va0), 0); in xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld128()
160 vacc1x0123 = vmlaq_lane_f32(vacc1x0123, vb0123c2, vget_high_f32(va1), 0); in xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld128()
161 vacc2x0123 = vmlaq_lane_f32(vacc2x0123, vb0123c2, vget_high_f32(va2), 0); in xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld128()
162 vacc3x0123 = vmlaq_lane_f32(vacc3x0123, vb0123c2, vget_high_f32(va3), 0); in xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld128()
163 vacc4x0123 = vmlaq_lane_f32(vacc4x0123, vb0123c2, vget_high_f32(va4), 0); in xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld128()
164 vacc5x0123 = vmlaq_lane_f32(vacc5x0123, vb0123c2, vget_high_f32(va5), 0); in xnn_f32_igemm_minmax_ukernel_6x8__neon_lane_ld128()

12345678