Home
last modified time | relevance | path

Searched refs:vb0123c3 (Results 1 – 25 of 486) sorted by relevance

12345678910>>...20

/external/XNNPACK/src/f32-igemm/gen/
D5x8s4-wasmrelaxedsimd-fma.c177 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma() local
180 vacc0x0123 = __builtin_wasm_fma_f32x4(vacc0x0123, va0, vb0123c3); in xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma()
181 vacc1x0123 = __builtin_wasm_fma_f32x4(vacc1x0123, va1, vb0123c3); in xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma()
182 vacc2x0123 = __builtin_wasm_fma_f32x4(vacc2x0123, va2, vb0123c3); in xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma()
183 vacc3x0123 = __builtin_wasm_fma_f32x4(vacc3x0123, va3, vb0123c3); in xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma()
184 vacc4x0123 = __builtin_wasm_fma_f32x4(vacc4x0123, va4, vb0123c3); in xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma()
269 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma() local
272 …uiltin_wasm_fma_f32x4(vacc0x0123, wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma()
273 …uiltin_wasm_fma_f32x4(vacc1x0123, wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma()
274 …uiltin_wasm_fma_f32x4(vacc2x0123, wasm_v128_andnot(va2, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma()
[all …]
D6x8s4-wasmrelaxedsimd-fma.c199 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma() local
202 vacc0x0123 = __builtin_wasm_fma_f32x4(vacc0x0123, va0, vb0123c3); in xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma()
203 vacc1x0123 = __builtin_wasm_fma_f32x4(vacc1x0123, va1, vb0123c3); in xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma()
204 vacc2x0123 = __builtin_wasm_fma_f32x4(vacc2x0123, va2, vb0123c3); in xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma()
205 vacc3x0123 = __builtin_wasm_fma_f32x4(vacc3x0123, va3, vb0123c3); in xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma()
206 vacc4x0123 = __builtin_wasm_fma_f32x4(vacc4x0123, va4, vb0123c3); in xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma()
207 vacc5x0123 = __builtin_wasm_fma_f32x4(vacc5x0123, va5, vb0123c3); in xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma()
304 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma() local
307 …uiltin_wasm_fma_f32x4(vacc0x0123, wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma()
308 …uiltin_wasm_fma_f32x4(vacc1x0123, wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma()
[all …]
D6x8s4-relu-wasmrelaxedsimd-fma.c199 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma() local
202 vacc0x0123 = __builtin_wasm_fma_f32x4(vacc0x0123, va0, vb0123c3); in xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma()
203 vacc1x0123 = __builtin_wasm_fma_f32x4(vacc1x0123, va1, vb0123c3); in xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma()
204 vacc2x0123 = __builtin_wasm_fma_f32x4(vacc2x0123, va2, vb0123c3); in xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma()
205 vacc3x0123 = __builtin_wasm_fma_f32x4(vacc3x0123, va3, vb0123c3); in xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma()
206 vacc4x0123 = __builtin_wasm_fma_f32x4(vacc4x0123, va4, vb0123c3); in xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma()
207 vacc5x0123 = __builtin_wasm_fma_f32x4(vacc5x0123, va5, vb0123c3); in xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma()
304 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma() local
307 …uiltin_wasm_fma_f32x4(vacc0x0123, wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma()
308 …uiltin_wasm_fma_f32x4(vacc1x0123, wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma()
[all …]
D4x8s4-wasmrelaxedsimd-fma.c155 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma() local
158 vacc0x0123 = __builtin_wasm_fma_f32x4(vacc0x0123, va0, vb0123c3); in xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma()
159 vacc1x0123 = __builtin_wasm_fma_f32x4(vacc1x0123, va1, vb0123c3); in xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma()
160 vacc2x0123 = __builtin_wasm_fma_f32x4(vacc2x0123, va2, vb0123c3); in xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma()
161 vacc3x0123 = __builtin_wasm_fma_f32x4(vacc3x0123, va3, vb0123c3); in xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma()
234 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma() local
237 …uiltin_wasm_fma_f32x4(vacc0x0123, wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma()
238 …uiltin_wasm_fma_f32x4(vacc1x0123, wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma()
239 …uiltin_wasm_fma_f32x4(vacc2x0123, wasm_v128_andnot(va2, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma()
240 …uiltin_wasm_fma_f32x4(vacc3x0123, wasm_v128_andnot(va3, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma()
D4x8s4-relu-wasmrelaxedsimd-fma.c155 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma() local
158 vacc0x0123 = __builtin_wasm_fma_f32x4(vacc0x0123, va0, vb0123c3); in xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma()
159 vacc1x0123 = __builtin_wasm_fma_f32x4(vacc1x0123, va1, vb0123c3); in xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma()
160 vacc2x0123 = __builtin_wasm_fma_f32x4(vacc2x0123, va2, vb0123c3); in xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma()
161 vacc3x0123 = __builtin_wasm_fma_f32x4(vacc3x0123, va3, vb0123c3); in xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma()
234 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma() local
237 …uiltin_wasm_fma_f32x4(vacc0x0123, wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma()
238 …uiltin_wasm_fma_f32x4(vacc1x0123, wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma()
239 …uiltin_wasm_fma_f32x4(vacc2x0123, wasm_v128_andnot(va2, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma()
240 …uiltin_wasm_fma_f32x4(vacc3x0123, wasm_v128_andnot(va3, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma()
D5x8s4-minmax-wasmrelaxedsimd-fma.c179 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma() local
182 vacc0x0123 = __builtin_wasm_fma_f32x4(vacc0x0123, va0, vb0123c3); in xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma()
183 vacc1x0123 = __builtin_wasm_fma_f32x4(vacc1x0123, va1, vb0123c3); in xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma()
184 vacc2x0123 = __builtin_wasm_fma_f32x4(vacc2x0123, va2, vb0123c3); in xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma()
185 vacc3x0123 = __builtin_wasm_fma_f32x4(vacc3x0123, va3, vb0123c3); in xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma()
186 vacc4x0123 = __builtin_wasm_fma_f32x4(vacc4x0123, va4, vb0123c3); in xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma()
271 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma() local
274 …uiltin_wasm_fma_f32x4(vacc0x0123, wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma()
275 …uiltin_wasm_fma_f32x4(vacc1x0123, wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma()
276 …uiltin_wasm_fma_f32x4(vacc2x0123, wasm_v128_andnot(va2, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma()
[all …]
D5x8s4-relu-wasmsimd.c177 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd() local
180 vacc0x0123 = wasm_f32x4_add(vacc0x0123, wasm_f32x4_mul(va0, vb0123c3)); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd()
181 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1, vb0123c3)); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd()
182 vacc2x0123 = wasm_f32x4_add(vacc2x0123, wasm_f32x4_mul(va2, vb0123c3)); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd()
183 vacc3x0123 = wasm_f32x4_add(vacc3x0123, wasm_f32x4_mul(va3, vb0123c3)); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd()
184 vacc4x0123 = wasm_f32x4_add(vacc4x0123, wasm_f32x4_mul(va4, vb0123c3)); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd()
269 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd() local
272 …4_add(vacc0x0123, wasm_f32x4_mul(wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3)); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd()
273 …4_add(vacc1x0123, wasm_f32x4_mul(wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3)); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd()
274 …4_add(vacc2x0123, wasm_f32x4_mul(wasm_v128_andnot(va2, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3)); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd()
[all …]
D5x8s4-wasmsimd.c177 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_igemm_ukernel_5x8s4__wasmsimd() local
180 vacc0x0123 = wasm_f32x4_add(vacc0x0123, wasm_f32x4_mul(va0, vb0123c3)); in xnn_f32_igemm_ukernel_5x8s4__wasmsimd()
181 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1, vb0123c3)); in xnn_f32_igemm_ukernel_5x8s4__wasmsimd()
182 vacc2x0123 = wasm_f32x4_add(vacc2x0123, wasm_f32x4_mul(va2, vb0123c3)); in xnn_f32_igemm_ukernel_5x8s4__wasmsimd()
183 vacc3x0123 = wasm_f32x4_add(vacc3x0123, wasm_f32x4_mul(va3, vb0123c3)); in xnn_f32_igemm_ukernel_5x8s4__wasmsimd()
184 vacc4x0123 = wasm_f32x4_add(vacc4x0123, wasm_f32x4_mul(va4, vb0123c3)); in xnn_f32_igemm_ukernel_5x8s4__wasmsimd()
269 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_igemm_ukernel_5x8s4__wasmsimd() local
272 …4_add(vacc0x0123, wasm_f32x4_mul(wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3)); in xnn_f32_igemm_ukernel_5x8s4__wasmsimd()
273 …4_add(vacc1x0123, wasm_f32x4_mul(wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3)); in xnn_f32_igemm_ukernel_5x8s4__wasmsimd()
274 …4_add(vacc2x0123, wasm_f32x4_mul(wasm_v128_andnot(va2, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3)); in xnn_f32_igemm_ukernel_5x8s4__wasmsimd()
[all …]
D5x8s4-relu-wasmrelaxedsimd-fma.c177 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma() local
180 vacc0x0123 = __builtin_wasm_fma_f32x4(vacc0x0123, va0, vb0123c3); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma()
181 vacc1x0123 = __builtin_wasm_fma_f32x4(vacc1x0123, va1, vb0123c3); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma()
182 vacc2x0123 = __builtin_wasm_fma_f32x4(vacc2x0123, va2, vb0123c3); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma()
183 vacc3x0123 = __builtin_wasm_fma_f32x4(vacc3x0123, va3, vb0123c3); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma()
184 vacc4x0123 = __builtin_wasm_fma_f32x4(vacc4x0123, va4, vb0123c3); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma()
269 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma() local
272 …uiltin_wasm_fma_f32x4(vacc0x0123, wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma()
273 …uiltin_wasm_fma_f32x4(vacc1x0123, wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma()
274 …uiltin_wasm_fma_f32x4(vacc2x0123, wasm_v128_andnot(va2, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma()
[all …]
/external/XNNPACK/src/f32-gemm/gen/
D5x8s4-wasmrelaxedsimd-fma.c152 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma() local
155 vacc0x0123 = __builtin_wasm_fma_f32x4(vacc0x0123, va0, vb0123c3); in xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma()
156 vacc1x0123 = __builtin_wasm_fma_f32x4(vacc1x0123, va1, vb0123c3); in xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma()
157 vacc2x0123 = __builtin_wasm_fma_f32x4(vacc2x0123, va2, vb0123c3); in xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma()
158 vacc3x0123 = __builtin_wasm_fma_f32x4(vacc3x0123, va3, vb0123c3); in xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma()
159 vacc4x0123 = __builtin_wasm_fma_f32x4(vacc4x0123, va4, vb0123c3); in xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma()
244 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma() local
247 …uiltin_wasm_fma_f32x4(vacc0x0123, wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma()
248 …uiltin_wasm_fma_f32x4(vacc1x0123, wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma()
249 …uiltin_wasm_fma_f32x4(vacc2x0123, wasm_v128_andnot(va2, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma()
[all …]
D5x8s4-relu-wasmrelaxedsimd-fma.c152 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma() local
155 vacc0x0123 = __builtin_wasm_fma_f32x4(vacc0x0123, va0, vb0123c3); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma()
156 vacc1x0123 = __builtin_wasm_fma_f32x4(vacc1x0123, va1, vb0123c3); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma()
157 vacc2x0123 = __builtin_wasm_fma_f32x4(vacc2x0123, va2, vb0123c3); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma()
158 vacc3x0123 = __builtin_wasm_fma_f32x4(vacc3x0123, va3, vb0123c3); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma()
159 vacc4x0123 = __builtin_wasm_fma_f32x4(vacc4x0123, va4, vb0123c3); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma()
244 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma() local
247 …uiltin_wasm_fma_f32x4(vacc0x0123, wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma()
248 …uiltin_wasm_fma_f32x4(vacc1x0123, wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma()
249 …uiltin_wasm_fma_f32x4(vacc2x0123, wasm_v128_andnot(va2, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma()
[all …]
D6x8s4-relu-wasmrelaxedsimd-fma.c171 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma() local
174 vacc0x0123 = __builtin_wasm_fma_f32x4(vacc0x0123, va0, vb0123c3); in xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma()
175 vacc1x0123 = __builtin_wasm_fma_f32x4(vacc1x0123, va1, vb0123c3); in xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma()
176 vacc2x0123 = __builtin_wasm_fma_f32x4(vacc2x0123, va2, vb0123c3); in xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma()
177 vacc3x0123 = __builtin_wasm_fma_f32x4(vacc3x0123, va3, vb0123c3); in xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma()
178 vacc4x0123 = __builtin_wasm_fma_f32x4(vacc4x0123, va4, vb0123c3); in xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma()
179 vacc5x0123 = __builtin_wasm_fma_f32x4(vacc5x0123, va5, vb0123c3); in xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma()
276 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma() local
279 …uiltin_wasm_fma_f32x4(vacc0x0123, wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma()
280 …uiltin_wasm_fma_f32x4(vacc1x0123, wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma()
[all …]
D6x8s4-wasmsimd.c171 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_gemm_ukernel_6x8s4__wasmsimd() local
174 vacc0x0123 = wasm_f32x4_add(vacc0x0123, wasm_f32x4_mul(va0, vb0123c3)); in xnn_f32_gemm_ukernel_6x8s4__wasmsimd()
175 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1, vb0123c3)); in xnn_f32_gemm_ukernel_6x8s4__wasmsimd()
176 vacc2x0123 = wasm_f32x4_add(vacc2x0123, wasm_f32x4_mul(va2, vb0123c3)); in xnn_f32_gemm_ukernel_6x8s4__wasmsimd()
177 vacc3x0123 = wasm_f32x4_add(vacc3x0123, wasm_f32x4_mul(va3, vb0123c3)); in xnn_f32_gemm_ukernel_6x8s4__wasmsimd()
178 vacc4x0123 = wasm_f32x4_add(vacc4x0123, wasm_f32x4_mul(va4, vb0123c3)); in xnn_f32_gemm_ukernel_6x8s4__wasmsimd()
179 vacc5x0123 = wasm_f32x4_add(vacc5x0123, wasm_f32x4_mul(va5, vb0123c3)); in xnn_f32_gemm_ukernel_6x8s4__wasmsimd()
276 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_gemm_ukernel_6x8s4__wasmsimd() local
279 …4_add(vacc0x0123, wasm_f32x4_mul(wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3)); in xnn_f32_gemm_ukernel_6x8s4__wasmsimd()
280 …4_add(vacc1x0123, wasm_f32x4_mul(wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3)); in xnn_f32_gemm_ukernel_6x8s4__wasmsimd()
[all …]
D6x8s4-wasmrelaxedsimd-fma.c171 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma() local
174 vacc0x0123 = __builtin_wasm_fma_f32x4(vacc0x0123, va0, vb0123c3); in xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma()
175 vacc1x0123 = __builtin_wasm_fma_f32x4(vacc1x0123, va1, vb0123c3); in xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma()
176 vacc2x0123 = __builtin_wasm_fma_f32x4(vacc2x0123, va2, vb0123c3); in xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma()
177 vacc3x0123 = __builtin_wasm_fma_f32x4(vacc3x0123, va3, vb0123c3); in xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma()
178 vacc4x0123 = __builtin_wasm_fma_f32x4(vacc4x0123, va4, vb0123c3); in xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma()
179 vacc5x0123 = __builtin_wasm_fma_f32x4(vacc5x0123, va5, vb0123c3); in xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma()
276 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma() local
279 …uiltin_wasm_fma_f32x4(vacc0x0123, wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma()
280 …uiltin_wasm_fma_f32x4(vacc1x0123, wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma()
[all …]
D6x8s4-minmax-wasmrelaxedsimd-fma.c173 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma() local
176 vacc0x0123 = __builtin_wasm_fma_f32x4(vacc0x0123, va0, vb0123c3); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma()
177 vacc1x0123 = __builtin_wasm_fma_f32x4(vacc1x0123, va1, vb0123c3); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma()
178 vacc2x0123 = __builtin_wasm_fma_f32x4(vacc2x0123, va2, vb0123c3); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma()
179 vacc3x0123 = __builtin_wasm_fma_f32x4(vacc3x0123, va3, vb0123c3); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma()
180 vacc4x0123 = __builtin_wasm_fma_f32x4(vacc4x0123, va4, vb0123c3); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma()
181 vacc5x0123 = __builtin_wasm_fma_f32x4(vacc5x0123, va5, vb0123c3); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma()
278 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma() local
281 …uiltin_wasm_fma_f32x4(vacc0x0123, wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma()
282 …uiltin_wasm_fma_f32x4(vacc1x0123, wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma()
[all …]
D4x8s4-wasmrelaxedsimd-fma.c133 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma() local
136 vacc0x0123 = __builtin_wasm_fma_f32x4(vacc0x0123, va0, vb0123c3); in xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma()
137 vacc1x0123 = __builtin_wasm_fma_f32x4(vacc1x0123, va1, vb0123c3); in xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma()
138 vacc2x0123 = __builtin_wasm_fma_f32x4(vacc2x0123, va2, vb0123c3); in xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma()
139 vacc3x0123 = __builtin_wasm_fma_f32x4(vacc3x0123, va3, vb0123c3); in xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma()
212 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma() local
215 …uiltin_wasm_fma_f32x4(vacc0x0123, wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma()
216 …uiltin_wasm_fma_f32x4(vacc1x0123, wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma()
217 …uiltin_wasm_fma_f32x4(vacc2x0123, wasm_v128_andnot(va2, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma()
218 …uiltin_wasm_fma_f32x4(vacc3x0123, wasm_v128_andnot(va3, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma()
D3x8s4-wasmrelaxedsimd-fma.c114 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_gemm_ukernel_3x8s4__wasmrelaxedsimd_fma() local
117 vacc0x0123 = __builtin_wasm_fma_f32x4(vacc0x0123, va0, vb0123c3); in xnn_f32_gemm_ukernel_3x8s4__wasmrelaxedsimd_fma()
118 vacc1x0123 = __builtin_wasm_fma_f32x4(vacc1x0123, va1, vb0123c3); in xnn_f32_gemm_ukernel_3x8s4__wasmrelaxedsimd_fma()
119 vacc2x0123 = __builtin_wasm_fma_f32x4(vacc2x0123, va2, vb0123c3); in xnn_f32_gemm_ukernel_3x8s4__wasmrelaxedsimd_fma()
180 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_gemm_ukernel_3x8s4__wasmrelaxedsimd_fma() local
183 …uiltin_wasm_fma_f32x4(vacc0x0123, wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_gemm_ukernel_3x8s4__wasmrelaxedsimd_fma()
184 …uiltin_wasm_fma_f32x4(vacc1x0123, wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_gemm_ukernel_3x8s4__wasmrelaxedsimd_fma()
185 …uiltin_wasm_fma_f32x4(vacc2x0123, wasm_v128_andnot(va2, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_gemm_ukernel_3x8s4__wasmrelaxedsimd_fma()
D4x8s4-wasmsimd.c133 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_gemm_ukernel_4x8s4__wasmsimd() local
136 vacc0x0123 = wasm_f32x4_add(vacc0x0123, wasm_f32x4_mul(va0, vb0123c3)); in xnn_f32_gemm_ukernel_4x8s4__wasmsimd()
137 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1, vb0123c3)); in xnn_f32_gemm_ukernel_4x8s4__wasmsimd()
138 vacc2x0123 = wasm_f32x4_add(vacc2x0123, wasm_f32x4_mul(va2, vb0123c3)); in xnn_f32_gemm_ukernel_4x8s4__wasmsimd()
139 vacc3x0123 = wasm_f32x4_add(vacc3x0123, wasm_f32x4_mul(va3, vb0123c3)); in xnn_f32_gemm_ukernel_4x8s4__wasmsimd()
212 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_gemm_ukernel_4x8s4__wasmsimd() local
215 …4_add(vacc0x0123, wasm_f32x4_mul(wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3)); in xnn_f32_gemm_ukernel_4x8s4__wasmsimd()
216 …4_add(vacc1x0123, wasm_f32x4_mul(wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3)); in xnn_f32_gemm_ukernel_4x8s4__wasmsimd()
217 …4_add(vacc2x0123, wasm_f32x4_mul(wasm_v128_andnot(va2, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3)); in xnn_f32_gemm_ukernel_4x8s4__wasmsimd()
218 …4_add(vacc3x0123, wasm_f32x4_mul(wasm_v128_andnot(va3, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3)); in xnn_f32_gemm_ukernel_4x8s4__wasmsimd()
D4x8s4-relu-wasmrelaxedsimd-fma.c133 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma() local
136 vacc0x0123 = __builtin_wasm_fma_f32x4(vacc0x0123, va0, vb0123c3); in xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma()
137 vacc1x0123 = __builtin_wasm_fma_f32x4(vacc1x0123, va1, vb0123c3); in xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma()
138 vacc2x0123 = __builtin_wasm_fma_f32x4(vacc2x0123, va2, vb0123c3); in xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma()
139 vacc3x0123 = __builtin_wasm_fma_f32x4(vacc3x0123, va3, vb0123c3); in xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma()
212 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma() local
215 …uiltin_wasm_fma_f32x4(vacc0x0123, wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma()
216 …uiltin_wasm_fma_f32x4(vacc1x0123, wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma()
217 …uiltin_wasm_fma_f32x4(vacc2x0123, wasm_v128_andnot(va2, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma()
218 …uiltin_wasm_fma_f32x4(vacc3x0123, wasm_v128_andnot(va3, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma()
D5x8s4-minmax-wasmrelaxedsimd.c154 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmrelaxedsimd() local
157 vacc0x0123 = wasm_f32x4_add(vacc0x0123, wasm_f32x4_mul(va0, vb0123c3)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmrelaxedsimd()
158 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1, vb0123c3)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmrelaxedsimd()
159 vacc2x0123 = wasm_f32x4_add(vacc2x0123, wasm_f32x4_mul(va2, vb0123c3)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmrelaxedsimd()
160 vacc3x0123 = wasm_f32x4_add(vacc3x0123, wasm_f32x4_mul(va3, vb0123c3)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmrelaxedsimd()
161 vacc4x0123 = wasm_f32x4_add(vacc4x0123, wasm_f32x4_mul(va4, vb0123c3)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmrelaxedsimd()
246 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmrelaxedsimd() local
249 …4_add(vacc0x0123, wasm_f32x4_mul(wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmrelaxedsimd()
250 …4_add(vacc1x0123, wasm_f32x4_mul(wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmrelaxedsimd()
251 …4_add(vacc2x0123, wasm_f32x4_mul(wasm_v128_andnot(va2, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmrelaxedsimd()
[all …]
D5x8s4-minmax-wasmsimd-arm.c154 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_arm() local
157 vacc0x0123 = wasm_f32x4_add(vacc0x0123, wasm_f32x4_mul(va0, vb0123c3)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_arm()
158 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1, vb0123c3)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_arm()
159 vacc2x0123 = wasm_f32x4_add(vacc2x0123, wasm_f32x4_mul(va2, vb0123c3)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_arm()
160 vacc3x0123 = wasm_f32x4_add(vacc3x0123, wasm_f32x4_mul(va3, vb0123c3)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_arm()
161 vacc4x0123 = wasm_f32x4_add(vacc4x0123, wasm_f32x4_mul(va4, vb0123c3)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_arm()
246 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_arm() local
249 …4_add(vacc0x0123, wasm_f32x4_mul(wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_arm()
250 …4_add(vacc1x0123, wasm_f32x4_mul(wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_arm()
251 …4_add(vacc2x0123, wasm_f32x4_mul(wasm_v128_andnot(va2, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_arm()
[all …]
D5x8s4-relu-wasmsimd.c152 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd() local
155 vacc0x0123 = wasm_f32x4_add(vacc0x0123, wasm_f32x4_mul(va0, vb0123c3)); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd()
156 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1, vb0123c3)); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd()
157 vacc2x0123 = wasm_f32x4_add(vacc2x0123, wasm_f32x4_mul(va2, vb0123c3)); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd()
158 vacc3x0123 = wasm_f32x4_add(vacc3x0123, wasm_f32x4_mul(va3, vb0123c3)); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd()
159 vacc4x0123 = wasm_f32x4_add(vacc4x0123, wasm_f32x4_mul(va4, vb0123c3)); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd()
244 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd() local
247 …4_add(vacc0x0123, wasm_f32x4_mul(wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3)); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd()
248 …4_add(vacc1x0123, wasm_f32x4_mul(wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3)); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd()
249 …4_add(vacc2x0123, wasm_f32x4_mul(wasm_v128_andnot(va2, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3)); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd()
[all …]
/external/XNNPACK/src/f32-gemm/gen-inc/
D5x8s4inc-minmax-wasmsimd-x86.c156 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86() local
159 vacc0x0123 = wasm_f32x4_add(vacc0x0123, wasm_f32x4_mul(va0, vb0123c3)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86()
160 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1, vb0123c3)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86()
161 vacc2x0123 = wasm_f32x4_add(vacc2x0123, wasm_f32x4_mul(va2, vb0123c3)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86()
162 vacc3x0123 = wasm_f32x4_add(vacc3x0123, wasm_f32x4_mul(va3, vb0123c3)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86()
163 vacc4x0123 = wasm_f32x4_add(vacc4x0123, wasm_f32x4_mul(va4, vb0123c3)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86()
248 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86() local
251 …4_add(vacc0x0123, wasm_f32x4_mul(wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86()
252 …4_add(vacc1x0123, wasm_f32x4_mul(wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86()
253 …4_add(vacc2x0123, wasm_f32x4_mul(wasm_v128_andnot(va2, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86()
[all …]
D5x8s4inc-minmax-wasmrelaxedsimd-fma.c156 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma() local
159 vacc0x0123 = __builtin_wasm_fma_f32x4(vacc0x0123, va0, vb0123c3); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma()
160 vacc1x0123 = __builtin_wasm_fma_f32x4(vacc1x0123, va1, vb0123c3); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma()
161 vacc2x0123 = __builtin_wasm_fma_f32x4(vacc2x0123, va2, vb0123c3); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma()
162 vacc3x0123 = __builtin_wasm_fma_f32x4(vacc3x0123, va3, vb0123c3); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma()
163 vacc4x0123 = __builtin_wasm_fma_f32x4(vacc4x0123, va4, vb0123c3); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma()
248 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma() local
251 …uiltin_wasm_fma_f32x4(vacc0x0123, wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma()
252 …uiltin_wasm_fma_f32x4(vacc1x0123, wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma()
253 …uiltin_wasm_fma_f32x4(vacc2x0123, wasm_v128_andnot(va2, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma()
[all …]
D5x8s4inc-minmax-wasmsimd-arm.c156 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm() local
159 vacc0x0123 = wasm_f32x4_add(vacc0x0123, wasm_f32x4_mul(va0, vb0123c3)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm()
160 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1, vb0123c3)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm()
161 vacc2x0123 = wasm_f32x4_add(vacc2x0123, wasm_f32x4_mul(va2, vb0123c3)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm()
162 vacc3x0123 = wasm_f32x4_add(vacc3x0123, wasm_f32x4_mul(va3, vb0123c3)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm()
163 vacc4x0123 = wasm_f32x4_add(vacc4x0123, wasm_f32x4_mul(va4, vb0123c3)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm()
248 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm() local
251 …4_add(vacc0x0123, wasm_f32x4_mul(wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm()
252 …4_add(vacc1x0123, wasm_f32x4_mul(wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm()
253 …4_add(vacc2x0123, wasm_f32x4_mul(wasm_v128_andnot(va2, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm()
[all …]

12345678910>>...20