Home
last modified time | relevance | path

Searched refs:vb4567c3 (Results 1 – 25 of 486) sorted by relevance

12345678910>>...20

/external/XNNPACK/src/f32-igemm/gen/
D5x8s4-wasmrelaxedsimd-fma.c178 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma() local
185 vacc0x4567 = __builtin_wasm_fma_f32x4(vacc0x4567, va0, vb4567c3); in xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma()
186 vacc1x4567 = __builtin_wasm_fma_f32x4(vacc1x4567, va1, vb4567c3); in xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma()
187 vacc2x4567 = __builtin_wasm_fma_f32x4(vacc2x4567, va2, vb4567c3); in xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma()
188 vacc3x4567 = __builtin_wasm_fma_f32x4(vacc3x4567, va3, vb4567c3); in xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma()
189 vacc4x4567 = __builtin_wasm_fma_f32x4(vacc4x4567, va4, vb4567c3); in xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma()
270 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma() local
277 …uiltin_wasm_fma_f32x4(vacc0x4567, wasm_v128_andnot(va0, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma()
278 …uiltin_wasm_fma_f32x4(vacc1x4567, wasm_v128_andnot(va1, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma()
279 …uiltin_wasm_fma_f32x4(vacc2x4567, wasm_v128_andnot(va2, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma()
[all …]
D6x8s4-wasmrelaxedsimd-fma.c200 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma() local
208 vacc0x4567 = __builtin_wasm_fma_f32x4(vacc0x4567, va0, vb4567c3); in xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma()
209 vacc1x4567 = __builtin_wasm_fma_f32x4(vacc1x4567, va1, vb4567c3); in xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma()
210 vacc2x4567 = __builtin_wasm_fma_f32x4(vacc2x4567, va2, vb4567c3); in xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma()
211 vacc3x4567 = __builtin_wasm_fma_f32x4(vacc3x4567, va3, vb4567c3); in xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma()
212 vacc4x4567 = __builtin_wasm_fma_f32x4(vacc4x4567, va4, vb4567c3); in xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma()
213 vacc5x4567 = __builtin_wasm_fma_f32x4(vacc5x4567, va5, vb4567c3); in xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma()
305 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma() local
313 …uiltin_wasm_fma_f32x4(vacc0x4567, wasm_v128_andnot(va0, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma()
314 …uiltin_wasm_fma_f32x4(vacc1x4567, wasm_v128_andnot(va1, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma()
[all …]
D6x8s4-relu-wasmrelaxedsimd-fma.c200 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma() local
208 vacc0x4567 = __builtin_wasm_fma_f32x4(vacc0x4567, va0, vb4567c3); in xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma()
209 vacc1x4567 = __builtin_wasm_fma_f32x4(vacc1x4567, va1, vb4567c3); in xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma()
210 vacc2x4567 = __builtin_wasm_fma_f32x4(vacc2x4567, va2, vb4567c3); in xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma()
211 vacc3x4567 = __builtin_wasm_fma_f32x4(vacc3x4567, va3, vb4567c3); in xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma()
212 vacc4x4567 = __builtin_wasm_fma_f32x4(vacc4x4567, va4, vb4567c3); in xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma()
213 vacc5x4567 = __builtin_wasm_fma_f32x4(vacc5x4567, va5, vb4567c3); in xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma()
305 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma() local
313 …uiltin_wasm_fma_f32x4(vacc0x4567, wasm_v128_andnot(va0, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma()
314 …uiltin_wasm_fma_f32x4(vacc1x4567, wasm_v128_andnot(va1, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma()
[all …]
D4x8s4-wasmrelaxedsimd-fma.c156 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma() local
162 vacc0x4567 = __builtin_wasm_fma_f32x4(vacc0x4567, va0, vb4567c3); in xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma()
163 vacc1x4567 = __builtin_wasm_fma_f32x4(vacc1x4567, va1, vb4567c3); in xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma()
164 vacc2x4567 = __builtin_wasm_fma_f32x4(vacc2x4567, va2, vb4567c3); in xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma()
165 vacc3x4567 = __builtin_wasm_fma_f32x4(vacc3x4567, va3, vb4567c3); in xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma()
235 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma() local
241 …uiltin_wasm_fma_f32x4(vacc0x4567, wasm_v128_andnot(va0, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma()
242 …uiltin_wasm_fma_f32x4(vacc1x4567, wasm_v128_andnot(va1, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma()
243 …uiltin_wasm_fma_f32x4(vacc2x4567, wasm_v128_andnot(va2, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma()
244 …uiltin_wasm_fma_f32x4(vacc3x4567, wasm_v128_andnot(va3, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma()
D4x8s4-relu-wasmrelaxedsimd-fma.c156 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma() local
162 vacc0x4567 = __builtin_wasm_fma_f32x4(vacc0x4567, va0, vb4567c3); in xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma()
163 vacc1x4567 = __builtin_wasm_fma_f32x4(vacc1x4567, va1, vb4567c3); in xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma()
164 vacc2x4567 = __builtin_wasm_fma_f32x4(vacc2x4567, va2, vb4567c3); in xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma()
165 vacc3x4567 = __builtin_wasm_fma_f32x4(vacc3x4567, va3, vb4567c3); in xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma()
235 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma() local
241 …uiltin_wasm_fma_f32x4(vacc0x4567, wasm_v128_andnot(va0, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma()
242 …uiltin_wasm_fma_f32x4(vacc1x4567, wasm_v128_andnot(va1, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma()
243 …uiltin_wasm_fma_f32x4(vacc2x4567, wasm_v128_andnot(va2, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma()
244 …uiltin_wasm_fma_f32x4(vacc3x4567, wasm_v128_andnot(va3, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma()
D5x8s4-minmax-wasmrelaxedsimd-fma.c180 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma() local
187 vacc0x4567 = __builtin_wasm_fma_f32x4(vacc0x4567, va0, vb4567c3); in xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma()
188 vacc1x4567 = __builtin_wasm_fma_f32x4(vacc1x4567, va1, vb4567c3); in xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma()
189 vacc2x4567 = __builtin_wasm_fma_f32x4(vacc2x4567, va2, vb4567c3); in xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma()
190 vacc3x4567 = __builtin_wasm_fma_f32x4(vacc3x4567, va3, vb4567c3); in xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma()
191 vacc4x4567 = __builtin_wasm_fma_f32x4(vacc4x4567, va4, vb4567c3); in xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma()
272 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma() local
279 …uiltin_wasm_fma_f32x4(vacc0x4567, wasm_v128_andnot(va0, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma()
280 …uiltin_wasm_fma_f32x4(vacc1x4567, wasm_v128_andnot(va1, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma()
281 …uiltin_wasm_fma_f32x4(vacc2x4567, wasm_v128_andnot(va2, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma()
[all …]
D5x8s4-relu-wasmsimd.c178 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd() local
185 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0, vb4567c3)); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd()
186 vacc1x4567 = wasm_f32x4_add(vacc1x4567, wasm_f32x4_mul(va1, vb4567c3)); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd()
187 vacc2x4567 = wasm_f32x4_add(vacc2x4567, wasm_f32x4_mul(va2, vb4567c3)); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd()
188 vacc3x4567 = wasm_f32x4_add(vacc3x4567, wasm_f32x4_mul(va3, vb4567c3)); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd()
189 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567c3)); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd()
270 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd() local
277 …4_add(vacc0x4567, wasm_f32x4_mul(wasm_v128_andnot(va0, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3)); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd()
278 …4_add(vacc1x4567, wasm_f32x4_mul(wasm_v128_andnot(va1, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3)); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd()
279 …4_add(vacc2x4567, wasm_f32x4_mul(wasm_v128_andnot(va2, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3)); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd()
[all …]
D5x8s4-wasmsimd.c178 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_igemm_ukernel_5x8s4__wasmsimd() local
185 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0, vb4567c3)); in xnn_f32_igemm_ukernel_5x8s4__wasmsimd()
186 vacc1x4567 = wasm_f32x4_add(vacc1x4567, wasm_f32x4_mul(va1, vb4567c3)); in xnn_f32_igemm_ukernel_5x8s4__wasmsimd()
187 vacc2x4567 = wasm_f32x4_add(vacc2x4567, wasm_f32x4_mul(va2, vb4567c3)); in xnn_f32_igemm_ukernel_5x8s4__wasmsimd()
188 vacc3x4567 = wasm_f32x4_add(vacc3x4567, wasm_f32x4_mul(va3, vb4567c3)); in xnn_f32_igemm_ukernel_5x8s4__wasmsimd()
189 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567c3)); in xnn_f32_igemm_ukernel_5x8s4__wasmsimd()
270 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_igemm_ukernel_5x8s4__wasmsimd() local
277 …4_add(vacc0x4567, wasm_f32x4_mul(wasm_v128_andnot(va0, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3)); in xnn_f32_igemm_ukernel_5x8s4__wasmsimd()
278 …4_add(vacc1x4567, wasm_f32x4_mul(wasm_v128_andnot(va1, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3)); in xnn_f32_igemm_ukernel_5x8s4__wasmsimd()
279 …4_add(vacc2x4567, wasm_f32x4_mul(wasm_v128_andnot(va2, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3)); in xnn_f32_igemm_ukernel_5x8s4__wasmsimd()
[all …]
D5x8s4-relu-wasmrelaxedsimd-fma.c178 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma() local
185 vacc0x4567 = __builtin_wasm_fma_f32x4(vacc0x4567, va0, vb4567c3); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma()
186 vacc1x4567 = __builtin_wasm_fma_f32x4(vacc1x4567, va1, vb4567c3); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma()
187 vacc2x4567 = __builtin_wasm_fma_f32x4(vacc2x4567, va2, vb4567c3); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma()
188 vacc3x4567 = __builtin_wasm_fma_f32x4(vacc3x4567, va3, vb4567c3); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma()
189 vacc4x4567 = __builtin_wasm_fma_f32x4(vacc4x4567, va4, vb4567c3); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma()
270 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma() local
277 …uiltin_wasm_fma_f32x4(vacc0x4567, wasm_v128_andnot(va0, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma()
278 …uiltin_wasm_fma_f32x4(vacc1x4567, wasm_v128_andnot(va1, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma()
279 …uiltin_wasm_fma_f32x4(vacc2x4567, wasm_v128_andnot(va2, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma()
[all …]
/external/XNNPACK/src/f32-gemm/gen/
D5x8s4-wasmrelaxedsimd-fma.c153 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma() local
160 vacc0x4567 = __builtin_wasm_fma_f32x4(vacc0x4567, va0, vb4567c3); in xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma()
161 vacc1x4567 = __builtin_wasm_fma_f32x4(vacc1x4567, va1, vb4567c3); in xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma()
162 vacc2x4567 = __builtin_wasm_fma_f32x4(vacc2x4567, va2, vb4567c3); in xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma()
163 vacc3x4567 = __builtin_wasm_fma_f32x4(vacc3x4567, va3, vb4567c3); in xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma()
164 vacc4x4567 = __builtin_wasm_fma_f32x4(vacc4x4567, va4, vb4567c3); in xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma()
245 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma() local
252 …uiltin_wasm_fma_f32x4(vacc0x4567, wasm_v128_andnot(va0, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma()
253 …uiltin_wasm_fma_f32x4(vacc1x4567, wasm_v128_andnot(va1, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma()
254 …uiltin_wasm_fma_f32x4(vacc2x4567, wasm_v128_andnot(va2, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma()
[all …]
D5x8s4-relu-wasmrelaxedsimd-fma.c153 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma() local
160 vacc0x4567 = __builtin_wasm_fma_f32x4(vacc0x4567, va0, vb4567c3); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma()
161 vacc1x4567 = __builtin_wasm_fma_f32x4(vacc1x4567, va1, vb4567c3); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma()
162 vacc2x4567 = __builtin_wasm_fma_f32x4(vacc2x4567, va2, vb4567c3); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma()
163 vacc3x4567 = __builtin_wasm_fma_f32x4(vacc3x4567, va3, vb4567c3); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma()
164 vacc4x4567 = __builtin_wasm_fma_f32x4(vacc4x4567, va4, vb4567c3); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma()
245 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma() local
252 …uiltin_wasm_fma_f32x4(vacc0x4567, wasm_v128_andnot(va0, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma()
253 …uiltin_wasm_fma_f32x4(vacc1x4567, wasm_v128_andnot(va1, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma()
254 …uiltin_wasm_fma_f32x4(vacc2x4567, wasm_v128_andnot(va2, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma()
[all …]
D6x8s4-relu-wasmrelaxedsimd-fma.c172 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma() local
180 vacc0x4567 = __builtin_wasm_fma_f32x4(vacc0x4567, va0, vb4567c3); in xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma()
181 vacc1x4567 = __builtin_wasm_fma_f32x4(vacc1x4567, va1, vb4567c3); in xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma()
182 vacc2x4567 = __builtin_wasm_fma_f32x4(vacc2x4567, va2, vb4567c3); in xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma()
183 vacc3x4567 = __builtin_wasm_fma_f32x4(vacc3x4567, va3, vb4567c3); in xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma()
184 vacc4x4567 = __builtin_wasm_fma_f32x4(vacc4x4567, va4, vb4567c3); in xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma()
185 vacc5x4567 = __builtin_wasm_fma_f32x4(vacc5x4567, va5, vb4567c3); in xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma()
277 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma() local
285 …uiltin_wasm_fma_f32x4(vacc0x4567, wasm_v128_andnot(va0, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma()
286 …uiltin_wasm_fma_f32x4(vacc1x4567, wasm_v128_andnot(va1, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma()
[all …]
D6x8s4-wasmsimd.c172 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_gemm_ukernel_6x8s4__wasmsimd() local
180 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0, vb4567c3)); in xnn_f32_gemm_ukernel_6x8s4__wasmsimd()
181 vacc1x4567 = wasm_f32x4_add(vacc1x4567, wasm_f32x4_mul(va1, vb4567c3)); in xnn_f32_gemm_ukernel_6x8s4__wasmsimd()
182 vacc2x4567 = wasm_f32x4_add(vacc2x4567, wasm_f32x4_mul(va2, vb4567c3)); in xnn_f32_gemm_ukernel_6x8s4__wasmsimd()
183 vacc3x4567 = wasm_f32x4_add(vacc3x4567, wasm_f32x4_mul(va3, vb4567c3)); in xnn_f32_gemm_ukernel_6x8s4__wasmsimd()
184 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567c3)); in xnn_f32_gemm_ukernel_6x8s4__wasmsimd()
185 vacc5x4567 = wasm_f32x4_add(vacc5x4567, wasm_f32x4_mul(va5, vb4567c3)); in xnn_f32_gemm_ukernel_6x8s4__wasmsimd()
277 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_gemm_ukernel_6x8s4__wasmsimd() local
285 …4_add(vacc0x4567, wasm_f32x4_mul(wasm_v128_andnot(va0, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3)); in xnn_f32_gemm_ukernel_6x8s4__wasmsimd()
286 …4_add(vacc1x4567, wasm_f32x4_mul(wasm_v128_andnot(va1, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3)); in xnn_f32_gemm_ukernel_6x8s4__wasmsimd()
[all …]
D6x8s4-wasmrelaxedsimd-fma.c172 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma() local
180 vacc0x4567 = __builtin_wasm_fma_f32x4(vacc0x4567, va0, vb4567c3); in xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma()
181 vacc1x4567 = __builtin_wasm_fma_f32x4(vacc1x4567, va1, vb4567c3); in xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma()
182 vacc2x4567 = __builtin_wasm_fma_f32x4(vacc2x4567, va2, vb4567c3); in xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma()
183 vacc3x4567 = __builtin_wasm_fma_f32x4(vacc3x4567, va3, vb4567c3); in xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma()
184 vacc4x4567 = __builtin_wasm_fma_f32x4(vacc4x4567, va4, vb4567c3); in xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma()
185 vacc5x4567 = __builtin_wasm_fma_f32x4(vacc5x4567, va5, vb4567c3); in xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma()
277 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma() local
285 …uiltin_wasm_fma_f32x4(vacc0x4567, wasm_v128_andnot(va0, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma()
286 …uiltin_wasm_fma_f32x4(vacc1x4567, wasm_v128_andnot(va1, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma()
[all …]
D6x8s4-minmax-wasmrelaxedsimd-fma.c174 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma() local
182 vacc0x4567 = __builtin_wasm_fma_f32x4(vacc0x4567, va0, vb4567c3); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma()
183 vacc1x4567 = __builtin_wasm_fma_f32x4(vacc1x4567, va1, vb4567c3); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma()
184 vacc2x4567 = __builtin_wasm_fma_f32x4(vacc2x4567, va2, vb4567c3); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma()
185 vacc3x4567 = __builtin_wasm_fma_f32x4(vacc3x4567, va3, vb4567c3); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma()
186 vacc4x4567 = __builtin_wasm_fma_f32x4(vacc4x4567, va4, vb4567c3); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma()
187 vacc5x4567 = __builtin_wasm_fma_f32x4(vacc5x4567, va5, vb4567c3); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma()
279 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma() local
287 …uiltin_wasm_fma_f32x4(vacc0x4567, wasm_v128_andnot(va0, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma()
288 …uiltin_wasm_fma_f32x4(vacc1x4567, wasm_v128_andnot(va1, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma()
[all …]
D4x8s4-wasmrelaxedsimd-fma.c134 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma() local
140 vacc0x4567 = __builtin_wasm_fma_f32x4(vacc0x4567, va0, vb4567c3); in xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma()
141 vacc1x4567 = __builtin_wasm_fma_f32x4(vacc1x4567, va1, vb4567c3); in xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma()
142 vacc2x4567 = __builtin_wasm_fma_f32x4(vacc2x4567, va2, vb4567c3); in xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma()
143 vacc3x4567 = __builtin_wasm_fma_f32x4(vacc3x4567, va3, vb4567c3); in xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma()
213 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma() local
219 …uiltin_wasm_fma_f32x4(vacc0x4567, wasm_v128_andnot(va0, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma()
220 …uiltin_wasm_fma_f32x4(vacc1x4567, wasm_v128_andnot(va1, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma()
221 …uiltin_wasm_fma_f32x4(vacc2x4567, wasm_v128_andnot(va2, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma()
222 …uiltin_wasm_fma_f32x4(vacc3x4567, wasm_v128_andnot(va3, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma()
D3x8s4-wasmrelaxedsimd-fma.c115 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_gemm_ukernel_3x8s4__wasmrelaxedsimd_fma() local
120 vacc0x4567 = __builtin_wasm_fma_f32x4(vacc0x4567, va0, vb4567c3); in xnn_f32_gemm_ukernel_3x8s4__wasmrelaxedsimd_fma()
121 vacc1x4567 = __builtin_wasm_fma_f32x4(vacc1x4567, va1, vb4567c3); in xnn_f32_gemm_ukernel_3x8s4__wasmrelaxedsimd_fma()
122 vacc2x4567 = __builtin_wasm_fma_f32x4(vacc2x4567, va2, vb4567c3); in xnn_f32_gemm_ukernel_3x8s4__wasmrelaxedsimd_fma()
181 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_gemm_ukernel_3x8s4__wasmrelaxedsimd_fma() local
186 …uiltin_wasm_fma_f32x4(vacc0x4567, wasm_v128_andnot(va0, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_gemm_ukernel_3x8s4__wasmrelaxedsimd_fma()
187 …uiltin_wasm_fma_f32x4(vacc1x4567, wasm_v128_andnot(va1, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_gemm_ukernel_3x8s4__wasmrelaxedsimd_fma()
188 …uiltin_wasm_fma_f32x4(vacc2x4567, wasm_v128_andnot(va2, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_gemm_ukernel_3x8s4__wasmrelaxedsimd_fma()
D4x8s4-wasmsimd.c134 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_gemm_ukernel_4x8s4__wasmsimd() local
140 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0, vb4567c3)); in xnn_f32_gemm_ukernel_4x8s4__wasmsimd()
141 vacc1x4567 = wasm_f32x4_add(vacc1x4567, wasm_f32x4_mul(va1, vb4567c3)); in xnn_f32_gemm_ukernel_4x8s4__wasmsimd()
142 vacc2x4567 = wasm_f32x4_add(vacc2x4567, wasm_f32x4_mul(va2, vb4567c3)); in xnn_f32_gemm_ukernel_4x8s4__wasmsimd()
143 vacc3x4567 = wasm_f32x4_add(vacc3x4567, wasm_f32x4_mul(va3, vb4567c3)); in xnn_f32_gemm_ukernel_4x8s4__wasmsimd()
213 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_gemm_ukernel_4x8s4__wasmsimd() local
219 …4_add(vacc0x4567, wasm_f32x4_mul(wasm_v128_andnot(va0, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3)); in xnn_f32_gemm_ukernel_4x8s4__wasmsimd()
220 …4_add(vacc1x4567, wasm_f32x4_mul(wasm_v128_andnot(va1, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3)); in xnn_f32_gemm_ukernel_4x8s4__wasmsimd()
221 …4_add(vacc2x4567, wasm_f32x4_mul(wasm_v128_andnot(va2, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3)); in xnn_f32_gemm_ukernel_4x8s4__wasmsimd()
222 …4_add(vacc3x4567, wasm_f32x4_mul(wasm_v128_andnot(va3, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3)); in xnn_f32_gemm_ukernel_4x8s4__wasmsimd()
D4x8s4-relu-wasmrelaxedsimd-fma.c134 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma() local
140 vacc0x4567 = __builtin_wasm_fma_f32x4(vacc0x4567, va0, vb4567c3); in xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma()
141 vacc1x4567 = __builtin_wasm_fma_f32x4(vacc1x4567, va1, vb4567c3); in xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma()
142 vacc2x4567 = __builtin_wasm_fma_f32x4(vacc2x4567, va2, vb4567c3); in xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma()
143 vacc3x4567 = __builtin_wasm_fma_f32x4(vacc3x4567, va3, vb4567c3); in xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma()
213 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma() local
219 …uiltin_wasm_fma_f32x4(vacc0x4567, wasm_v128_andnot(va0, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma()
220 …uiltin_wasm_fma_f32x4(vacc1x4567, wasm_v128_andnot(va1, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma()
221 …uiltin_wasm_fma_f32x4(vacc2x4567, wasm_v128_andnot(va2, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma()
222 …uiltin_wasm_fma_f32x4(vacc3x4567, wasm_v128_andnot(va3, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma()
D5x8s4-minmax-wasmrelaxedsimd.c155 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmrelaxedsimd() local
162 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0, vb4567c3)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmrelaxedsimd()
163 vacc1x4567 = wasm_f32x4_add(vacc1x4567, wasm_f32x4_mul(va1, vb4567c3)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmrelaxedsimd()
164 vacc2x4567 = wasm_f32x4_add(vacc2x4567, wasm_f32x4_mul(va2, vb4567c3)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmrelaxedsimd()
165 vacc3x4567 = wasm_f32x4_add(vacc3x4567, wasm_f32x4_mul(va3, vb4567c3)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmrelaxedsimd()
166 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567c3)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmrelaxedsimd()
247 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmrelaxedsimd() local
254 …4_add(vacc0x4567, wasm_f32x4_mul(wasm_v128_andnot(va0, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmrelaxedsimd()
255 …4_add(vacc1x4567, wasm_f32x4_mul(wasm_v128_andnot(va1, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmrelaxedsimd()
256 …4_add(vacc2x4567, wasm_f32x4_mul(wasm_v128_andnot(va2, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmrelaxedsimd()
[all …]
D5x8s4-minmax-wasmsimd-arm.c155 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_arm() local
162 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0, vb4567c3)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_arm()
163 vacc1x4567 = wasm_f32x4_add(vacc1x4567, wasm_f32x4_mul(va1, vb4567c3)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_arm()
164 vacc2x4567 = wasm_f32x4_add(vacc2x4567, wasm_f32x4_mul(va2, vb4567c3)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_arm()
165 vacc3x4567 = wasm_f32x4_add(vacc3x4567, wasm_f32x4_mul(va3, vb4567c3)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_arm()
166 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567c3)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_arm()
247 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_arm() local
254 …4_add(vacc0x4567, wasm_f32x4_mul(wasm_v128_andnot(va0, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_arm()
255 …4_add(vacc1x4567, wasm_f32x4_mul(wasm_v128_andnot(va1, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_arm()
256 …4_add(vacc2x4567, wasm_f32x4_mul(wasm_v128_andnot(va2, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_arm()
[all …]
D5x8s4-relu-wasmsimd.c153 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd() local
160 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0, vb4567c3)); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd()
161 vacc1x4567 = wasm_f32x4_add(vacc1x4567, wasm_f32x4_mul(va1, vb4567c3)); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd()
162 vacc2x4567 = wasm_f32x4_add(vacc2x4567, wasm_f32x4_mul(va2, vb4567c3)); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd()
163 vacc3x4567 = wasm_f32x4_add(vacc3x4567, wasm_f32x4_mul(va3, vb4567c3)); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd()
164 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567c3)); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd()
245 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd() local
252 …4_add(vacc0x4567, wasm_f32x4_mul(wasm_v128_andnot(va0, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3)); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd()
253 …4_add(vacc1x4567, wasm_f32x4_mul(wasm_v128_andnot(va1, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3)); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd()
254 …4_add(vacc2x4567, wasm_f32x4_mul(wasm_v128_andnot(va2, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3)); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd()
[all …]
/external/XNNPACK/src/f32-gemm/gen-inc/
D5x8s4inc-minmax-wasmsimd-x86.c157 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86() local
164 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0, vb4567c3)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86()
165 vacc1x4567 = wasm_f32x4_add(vacc1x4567, wasm_f32x4_mul(va1, vb4567c3)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86()
166 vacc2x4567 = wasm_f32x4_add(vacc2x4567, wasm_f32x4_mul(va2, vb4567c3)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86()
167 vacc3x4567 = wasm_f32x4_add(vacc3x4567, wasm_f32x4_mul(va3, vb4567c3)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86()
168 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567c3)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86()
249 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86() local
256 …4_add(vacc0x4567, wasm_f32x4_mul(wasm_v128_andnot(va0, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86()
257 …4_add(vacc1x4567, wasm_f32x4_mul(wasm_v128_andnot(va1, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86()
258 …4_add(vacc2x4567, wasm_f32x4_mul(wasm_v128_andnot(va2, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86()
[all …]
D5x8s4inc-minmax-wasmrelaxedsimd-fma.c157 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma() local
164 vacc0x4567 = __builtin_wasm_fma_f32x4(vacc0x4567, va0, vb4567c3); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma()
165 vacc1x4567 = __builtin_wasm_fma_f32x4(vacc1x4567, va1, vb4567c3); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma()
166 vacc2x4567 = __builtin_wasm_fma_f32x4(vacc2x4567, va2, vb4567c3); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma()
167 vacc3x4567 = __builtin_wasm_fma_f32x4(vacc3x4567, va3, vb4567c3); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma()
168 vacc4x4567 = __builtin_wasm_fma_f32x4(vacc4x4567, va4, vb4567c3); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma()
249 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma() local
256 …uiltin_wasm_fma_f32x4(vacc0x4567, wasm_v128_andnot(va0, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma()
257 …uiltin_wasm_fma_f32x4(vacc1x4567, wasm_v128_andnot(va1, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma()
258 …uiltin_wasm_fma_f32x4(vacc2x4567, wasm_v128_andnot(va2, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma()
[all …]
D5x8s4inc-minmax-wasmsimd-arm.c157 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm() local
164 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0, vb4567c3)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm()
165 vacc1x4567 = wasm_f32x4_add(vacc1x4567, wasm_f32x4_mul(va1, vb4567c3)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm()
166 vacc2x4567 = wasm_f32x4_add(vacc2x4567, wasm_f32x4_mul(va2, vb4567c3)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm()
167 vacc3x4567 = wasm_f32x4_add(vacc3x4567, wasm_f32x4_mul(va3, vb4567c3)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm()
168 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567c3)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm()
249 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm() local
256 …4_add(vacc0x4567, wasm_f32x4_mul(wasm_v128_andnot(va0, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm()
257 …4_add(vacc1x4567, wasm_f32x4_mul(wasm_v128_andnot(va1, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm()
258 …4_add(vacc2x4567, wasm_f32x4_mul(wasm_v128_andnot(va2, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm()
[all …]

12345678910>>...20