Home
last modified time | relevance | path

Searched refs:vb01234567c3 (Results 1 – 17 of 17) sorted by relevance

/external/XNNPACK/src/f16-gemm/gen/
D8x8-neonfp16arith-ld64.c195 …const float16x8_t vb01234567c3 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64() local
198 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c3, va0, 3); in xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64()
199 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c3, va1, 3); in xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64()
200 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c3, va2, 3); in xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64()
201 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c3, va3, 3); in xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64()
202 vacc4x01234567 = vfmaq_lane_f16(vacc4x01234567, vb01234567c3, va4, 3); in xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64()
203 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c3, va5, 3); in xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64()
204 vacc6x01234567 = vfmaq_lane_f16(vacc6x01234567, vb01234567c3, va6, 3); in xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64()
205 vacc7x01234567 = vfmaq_lane_f16(vacc7x01234567, vb01234567c3, va7, 3); in xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64()
216 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c3, vb01234567c3); in xnn_f16_gemm_ukernel_8x8__neonfp16arith_ld64()
[all …]
D6x8-neonfp16arith-ld64.c161 …const float16x8_t vb01234567c3 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64() local
164 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c3, va0, 3); in xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64()
165 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c3, va1, 3); in xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64()
166 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c3, va2, 3); in xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64()
167 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c3, va3, 3); in xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64()
168 vacc4x01234567 = vfmaq_lane_f16(vacc4x01234567, vb01234567c3, va4, 3); in xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64()
169 vacc5x01234567 = vfmaq_lane_f16(vacc5x01234567, vb01234567c3, va5, 3); in xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64()
178 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c3, vb01234567c3); in xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64()
179 vacc1x01234567 = vfmaq_f16(vacc1x01234567, va1c3, vb01234567c3); in xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64()
180 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c3, vb01234567c3); in xnn_f16_gemm_ukernel_6x8__neonfp16arith_ld64()
[all …]
D4x8-neonfp16arith-ld64.c127 …const float16x8_t vb01234567c3 = vld1q_f16(w); w = (const void*) ((uintptr_t) w + sizeof(float16x8… in xnn_f16_gemm_ukernel_4x8__neonfp16arith_ld64() local
130 vacc0x01234567 = vfmaq_lane_f16(vacc0x01234567, vb01234567c3, va0, 3); in xnn_f16_gemm_ukernel_4x8__neonfp16arith_ld64()
131 vacc1x01234567 = vfmaq_lane_f16(vacc1x01234567, vb01234567c3, va1, 3); in xnn_f16_gemm_ukernel_4x8__neonfp16arith_ld64()
132 vacc2x01234567 = vfmaq_lane_f16(vacc2x01234567, vb01234567c3, va2, 3); in xnn_f16_gemm_ukernel_4x8__neonfp16arith_ld64()
133 vacc3x01234567 = vfmaq_lane_f16(vacc3x01234567, vb01234567c3, va3, 3); in xnn_f16_gemm_ukernel_4x8__neonfp16arith_ld64()
140 vacc0x01234567 = vfmaq_f16(vacc0x01234567, va0c3, vb01234567c3); in xnn_f16_gemm_ukernel_4x8__neonfp16arith_ld64()
141 vacc1x01234567 = vfmaq_f16(vacc1x01234567, va1c3, vb01234567c3); in xnn_f16_gemm_ukernel_4x8__neonfp16arith_ld64()
142 vacc2x01234567 = vfmaq_f16(vacc2x01234567, va2c3, vb01234567c3); in xnn_f16_gemm_ukernel_4x8__neonfp16arith_ld64()
143 vacc3x01234567 = vfmaq_f16(vacc3x01234567, va3c3, vb01234567c3); in xnn_f16_gemm_ukernel_4x8__neonfp16arith_ld64()
/external/XNNPACK/src/f32-gemm/gen-inc/
D4x16s4-fma3-broadcast.c135 const __m256 vb01234567c3 = _mm256_load_ps(w + 48); in xnn_f32_gemminc_ukernel_4x16s4__fma3_broadcast() local
138 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567c3, vacc0x01234567); in xnn_f32_gemminc_ukernel_4x16s4__fma3_broadcast()
139 vacc1x01234567 = _mm256_fmadd_ps(va1, vb01234567c3, vacc1x01234567); in xnn_f32_gemminc_ukernel_4x16s4__fma3_broadcast()
140 vacc2x01234567 = _mm256_fmadd_ps(va2, vb01234567c3, vacc2x01234567); in xnn_f32_gemminc_ukernel_4x16s4__fma3_broadcast()
141 vacc3x01234567 = _mm256_fmadd_ps(va3, vb01234567c3, vacc3x01234567); in xnn_f32_gemminc_ukernel_4x16s4__fma3_broadcast()
D5x16s4-fma3-broadcast.c154 const __m256 vb01234567c3 = _mm256_load_ps(w + 48); in xnn_f32_gemminc_ukernel_5x16s4__fma3_broadcast() local
157 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567c3, vacc0x01234567); in xnn_f32_gemminc_ukernel_5x16s4__fma3_broadcast()
158 vacc1x01234567 = _mm256_fmadd_ps(va1, vb01234567c3, vacc1x01234567); in xnn_f32_gemminc_ukernel_5x16s4__fma3_broadcast()
159 vacc2x01234567 = _mm256_fmadd_ps(va2, vb01234567c3, vacc2x01234567); in xnn_f32_gemminc_ukernel_5x16s4__fma3_broadcast()
160 vacc3x01234567 = _mm256_fmadd_ps(va3, vb01234567c3, vacc3x01234567); in xnn_f32_gemminc_ukernel_5x16s4__fma3_broadcast()
161 vacc4x01234567 = _mm256_fmadd_ps(va4, vb01234567c3, vacc4x01234567); in xnn_f32_gemminc_ukernel_5x16s4__fma3_broadcast()
D3x16s4-fma3-broadcast.c116 const __m256 vb01234567c3 = _mm256_load_ps(w + 48); in xnn_f32_gemminc_ukernel_3x16s4__fma3_broadcast() local
119 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567c3, vacc0x01234567); in xnn_f32_gemminc_ukernel_3x16s4__fma3_broadcast()
120 vacc1x01234567 = _mm256_fmadd_ps(va1, vb01234567c3, vacc1x01234567); in xnn_f32_gemminc_ukernel_3x16s4__fma3_broadcast()
121 vacc2x01234567 = _mm256_fmadd_ps(va2, vb01234567c3, vacc2x01234567); in xnn_f32_gemminc_ukernel_3x16s4__fma3_broadcast()
D1x16s4-fma3-broadcast.c78 const __m256 vb01234567c3 = _mm256_load_ps(w + 48); in xnn_f32_gemminc_ukernel_1x16s4__fma3_broadcast() local
81 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567c3, vacc0x01234567); in xnn_f32_gemminc_ukernel_1x16s4__fma3_broadcast()
/external/XNNPACK/src/f32-gemm/gen/
D4x16s4-fma3-broadcast.c133 const __m256 vb01234567c3 = _mm256_load_ps(w + 48); in xnn_f32_gemm_ukernel_4x16s4__fma3_broadcast() local
136 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567c3, vacc0x01234567); in xnn_f32_gemm_ukernel_4x16s4__fma3_broadcast()
137 vacc1x01234567 = _mm256_fmadd_ps(va1, vb01234567c3, vacc1x01234567); in xnn_f32_gemm_ukernel_4x16s4__fma3_broadcast()
138 vacc2x01234567 = _mm256_fmadd_ps(va2, vb01234567c3, vacc2x01234567); in xnn_f32_gemm_ukernel_4x16s4__fma3_broadcast()
139 vacc3x01234567 = _mm256_fmadd_ps(va3, vb01234567c3, vacc3x01234567); in xnn_f32_gemm_ukernel_4x16s4__fma3_broadcast()
D5x16s4-fma3-broadcast.c152 const __m256 vb01234567c3 = _mm256_load_ps(w + 48); in xnn_f32_gemm_ukernel_5x16s4__fma3_broadcast() local
155 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567c3, vacc0x01234567); in xnn_f32_gemm_ukernel_5x16s4__fma3_broadcast()
156 vacc1x01234567 = _mm256_fmadd_ps(va1, vb01234567c3, vacc1x01234567); in xnn_f32_gemm_ukernel_5x16s4__fma3_broadcast()
157 vacc2x01234567 = _mm256_fmadd_ps(va2, vb01234567c3, vacc2x01234567); in xnn_f32_gemm_ukernel_5x16s4__fma3_broadcast()
158 vacc3x01234567 = _mm256_fmadd_ps(va3, vb01234567c3, vacc3x01234567); in xnn_f32_gemm_ukernel_5x16s4__fma3_broadcast()
159 vacc4x01234567 = _mm256_fmadd_ps(va4, vb01234567c3, vacc4x01234567); in xnn_f32_gemm_ukernel_5x16s4__fma3_broadcast()
D3x16s4-fma3-broadcast.c114 const __m256 vb01234567c3 = _mm256_load_ps(w + 48); in xnn_f32_gemm_ukernel_3x16s4__fma3_broadcast() local
117 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567c3, vacc0x01234567); in xnn_f32_gemm_ukernel_3x16s4__fma3_broadcast()
118 vacc1x01234567 = _mm256_fmadd_ps(va1, vb01234567c3, vacc1x01234567); in xnn_f32_gemm_ukernel_3x16s4__fma3_broadcast()
119 vacc2x01234567 = _mm256_fmadd_ps(va2, vb01234567c3, vacc2x01234567); in xnn_f32_gemm_ukernel_3x16s4__fma3_broadcast()
D1x16s4-fma3-broadcast.c76 const __m256 vb01234567c3 = _mm256_load_ps(w + 48); in xnn_f32_gemm_ukernel_1x16s4__fma3_broadcast() local
79 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567c3, vacc0x01234567); in xnn_f32_gemm_ukernel_1x16s4__fma3_broadcast()
/external/XNNPACK/src/f32-igemm/gen/
D5x16s4-fma3-broadcast.c177 const __m256 vb01234567c3 = _mm256_load_ps(w + 48); in xnn_f32_igemm_ukernel_5x16s4__fma3_broadcast() local
180 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567c3, vacc0x01234567); in xnn_f32_igemm_ukernel_5x16s4__fma3_broadcast()
181 vacc1x01234567 = _mm256_fmadd_ps(va1, vb01234567c3, vacc1x01234567); in xnn_f32_igemm_ukernel_5x16s4__fma3_broadcast()
182 vacc2x01234567 = _mm256_fmadd_ps(va2, vb01234567c3, vacc2x01234567); in xnn_f32_igemm_ukernel_5x16s4__fma3_broadcast()
183 vacc3x01234567 = _mm256_fmadd_ps(va3, vb01234567c3, vacc3x01234567); in xnn_f32_igemm_ukernel_5x16s4__fma3_broadcast()
184 vacc4x01234567 = _mm256_fmadd_ps(va4, vb01234567c3, vacc4x01234567); in xnn_f32_igemm_ukernel_5x16s4__fma3_broadcast()
D4x16s4-fma3-broadcast.c155 const __m256 vb01234567c3 = _mm256_load_ps(w + 48); in xnn_f32_igemm_ukernel_4x16s4__fma3_broadcast() local
158 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567c3, vacc0x01234567); in xnn_f32_igemm_ukernel_4x16s4__fma3_broadcast()
159 vacc1x01234567 = _mm256_fmadd_ps(va1, vb01234567c3, vacc1x01234567); in xnn_f32_igemm_ukernel_4x16s4__fma3_broadcast()
160 vacc2x01234567 = _mm256_fmadd_ps(va2, vb01234567c3, vacc2x01234567); in xnn_f32_igemm_ukernel_4x16s4__fma3_broadcast()
161 vacc3x01234567 = _mm256_fmadd_ps(va3, vb01234567c3, vacc3x01234567); in xnn_f32_igemm_ukernel_4x16s4__fma3_broadcast()
D3x16s4-fma3-broadcast.c133 const __m256 vb01234567c3 = _mm256_load_ps(w + 48); in xnn_f32_igemm_ukernel_3x16s4__fma3_broadcast() local
136 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567c3, vacc0x01234567); in xnn_f32_igemm_ukernel_3x16s4__fma3_broadcast()
137 vacc1x01234567 = _mm256_fmadd_ps(va1, vb01234567c3, vacc1x01234567); in xnn_f32_igemm_ukernel_3x16s4__fma3_broadcast()
138 vacc2x01234567 = _mm256_fmadd_ps(va2, vb01234567c3, vacc2x01234567); in xnn_f32_igemm_ukernel_3x16s4__fma3_broadcast()
D1x16s4-fma3-broadcast.c89 const __m256 vb01234567c3 = _mm256_load_ps(w + 48); in xnn_f32_igemm_ukernel_1x16s4__fma3_broadcast() local
92 vacc0x01234567 = _mm256_fmadd_ps(va0, vb01234567c3, vacc0x01234567); in xnn_f32_igemm_ukernel_1x16s4__fma3_broadcast()
/external/XNNPACK/src/q8-gemm/
D4x8-neon.c114 const uint8x8_t vb01234567c3 = vld1_u8(w); w = (const void*) ((uintptr_t) w + 8); in xnn_q8_gemm_ukernel_4x8__neon() local
115 const int16x8_t vxb01234567c3 = vreinterpretq_s16_u16(vsubl_u8(vb01234567c3, vb_zero_point)); in xnn_q8_gemm_ukernel_4x8__neon()
225 const uint8x8_t vb01234567c3 = vld1_u8(w); w = (const void*) ((uintptr_t) w + 8); in xnn_q8_gemm_ukernel_4x8__neon() local
226 … const int16x8_t vxb01234567c3 = vreinterpretq_s16_u16(vsubl_u8(vb01234567c3, vb_zero_point)); in xnn_q8_gemm_ukernel_4x8__neon()
D8x8-neon.c178 const uint8x8_t vb01234567c3 = vld1_u8(w); w = (const void*) ((uintptr_t) w + 8); in xnn_q8_gemm_ukernel_8x8__neon() local
179 const int16x8_t vxb01234567c3 = vreinterpretq_s16_u16(vsubl_u8(vb01234567c3, vb_zero_point)); in xnn_q8_gemm_ukernel_8x8__neon()
361 const uint8x8_t vb01234567c3 = vld1_u8(w); w = (const void*) ((uintptr_t) w + 8); in xnn_q8_gemm_ukernel_8x8__neon() local
362 … const int16x8_t vxb01234567c3 = vreinterpretq_s16_u16(vsubl_u8(vb01234567c3, vb_zero_point)); in xnn_q8_gemm_ukernel_8x8__neon()