/external/XNNPACK/src/f32-igemm/gen/ |
D | 5x8s4-wasmrelaxedsimd-fma.c | 178 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma() local 185 vacc0x4567 = __builtin_wasm_fma_f32x4(vacc0x4567, va0, vb4567c3); in xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma() 186 vacc1x4567 = __builtin_wasm_fma_f32x4(vacc1x4567, va1, vb4567c3); in xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma() 187 vacc2x4567 = __builtin_wasm_fma_f32x4(vacc2x4567, va2, vb4567c3); in xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma() 188 vacc3x4567 = __builtin_wasm_fma_f32x4(vacc3x4567, va3, vb4567c3); in xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma() 189 vacc4x4567 = __builtin_wasm_fma_f32x4(vacc4x4567, va4, vb4567c3); in xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma() 270 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma() local 277 …uiltin_wasm_fma_f32x4(vacc0x4567, wasm_v128_andnot(va0, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma() 278 …uiltin_wasm_fma_f32x4(vacc1x4567, wasm_v128_andnot(va1, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma() 279 …uiltin_wasm_fma_f32x4(vacc2x4567, wasm_v128_andnot(va2, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma() [all …]
|
D | 6x8s4-wasmrelaxedsimd-fma.c | 200 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma() local 208 vacc0x4567 = __builtin_wasm_fma_f32x4(vacc0x4567, va0, vb4567c3); in xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma() 209 vacc1x4567 = __builtin_wasm_fma_f32x4(vacc1x4567, va1, vb4567c3); in xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma() 210 vacc2x4567 = __builtin_wasm_fma_f32x4(vacc2x4567, va2, vb4567c3); in xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma() 211 vacc3x4567 = __builtin_wasm_fma_f32x4(vacc3x4567, va3, vb4567c3); in xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma() 212 vacc4x4567 = __builtin_wasm_fma_f32x4(vacc4x4567, va4, vb4567c3); in xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma() 213 vacc5x4567 = __builtin_wasm_fma_f32x4(vacc5x4567, va5, vb4567c3); in xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma() 305 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma() local 313 …uiltin_wasm_fma_f32x4(vacc0x4567, wasm_v128_andnot(va0, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma() 314 …uiltin_wasm_fma_f32x4(vacc1x4567, wasm_v128_andnot(va1, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma() [all …]
|
D | 6x8s4-relu-wasmrelaxedsimd-fma.c | 200 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma() local 208 vacc0x4567 = __builtin_wasm_fma_f32x4(vacc0x4567, va0, vb4567c3); in xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma() 209 vacc1x4567 = __builtin_wasm_fma_f32x4(vacc1x4567, va1, vb4567c3); in xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma() 210 vacc2x4567 = __builtin_wasm_fma_f32x4(vacc2x4567, va2, vb4567c3); in xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma() 211 vacc3x4567 = __builtin_wasm_fma_f32x4(vacc3x4567, va3, vb4567c3); in xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma() 212 vacc4x4567 = __builtin_wasm_fma_f32x4(vacc4x4567, va4, vb4567c3); in xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma() 213 vacc5x4567 = __builtin_wasm_fma_f32x4(vacc5x4567, va5, vb4567c3); in xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma() 305 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma() local 313 …uiltin_wasm_fma_f32x4(vacc0x4567, wasm_v128_andnot(va0, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma() 314 …uiltin_wasm_fma_f32x4(vacc1x4567, wasm_v128_andnot(va1, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma() [all …]
|
D | 4x8s4-wasmrelaxedsimd-fma.c | 156 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma() local 162 vacc0x4567 = __builtin_wasm_fma_f32x4(vacc0x4567, va0, vb4567c3); in xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma() 163 vacc1x4567 = __builtin_wasm_fma_f32x4(vacc1x4567, va1, vb4567c3); in xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma() 164 vacc2x4567 = __builtin_wasm_fma_f32x4(vacc2x4567, va2, vb4567c3); in xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma() 165 vacc3x4567 = __builtin_wasm_fma_f32x4(vacc3x4567, va3, vb4567c3); in xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma() 235 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma() local 241 …uiltin_wasm_fma_f32x4(vacc0x4567, wasm_v128_andnot(va0, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma() 242 …uiltin_wasm_fma_f32x4(vacc1x4567, wasm_v128_andnot(va1, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma() 243 …uiltin_wasm_fma_f32x4(vacc2x4567, wasm_v128_andnot(va2, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma() 244 …uiltin_wasm_fma_f32x4(vacc3x4567, wasm_v128_andnot(va3, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma()
|
D | 4x8s4-relu-wasmrelaxedsimd-fma.c | 156 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma() local 162 vacc0x4567 = __builtin_wasm_fma_f32x4(vacc0x4567, va0, vb4567c3); in xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma() 163 vacc1x4567 = __builtin_wasm_fma_f32x4(vacc1x4567, va1, vb4567c3); in xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma() 164 vacc2x4567 = __builtin_wasm_fma_f32x4(vacc2x4567, va2, vb4567c3); in xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma() 165 vacc3x4567 = __builtin_wasm_fma_f32x4(vacc3x4567, va3, vb4567c3); in xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma() 235 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma() local 241 …uiltin_wasm_fma_f32x4(vacc0x4567, wasm_v128_andnot(va0, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma() 242 …uiltin_wasm_fma_f32x4(vacc1x4567, wasm_v128_andnot(va1, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma() 243 …uiltin_wasm_fma_f32x4(vacc2x4567, wasm_v128_andnot(va2, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma() 244 …uiltin_wasm_fma_f32x4(vacc3x4567, wasm_v128_andnot(va3, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma()
|
D | 5x8s4-minmax-wasmrelaxedsimd-fma.c | 180 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma() local 187 vacc0x4567 = __builtin_wasm_fma_f32x4(vacc0x4567, va0, vb4567c3); in xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma() 188 vacc1x4567 = __builtin_wasm_fma_f32x4(vacc1x4567, va1, vb4567c3); in xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma() 189 vacc2x4567 = __builtin_wasm_fma_f32x4(vacc2x4567, va2, vb4567c3); in xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma() 190 vacc3x4567 = __builtin_wasm_fma_f32x4(vacc3x4567, va3, vb4567c3); in xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma() 191 vacc4x4567 = __builtin_wasm_fma_f32x4(vacc4x4567, va4, vb4567c3); in xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma() 272 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma() local 279 …uiltin_wasm_fma_f32x4(vacc0x4567, wasm_v128_andnot(va0, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma() 280 …uiltin_wasm_fma_f32x4(vacc1x4567, wasm_v128_andnot(va1, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma() 281 …uiltin_wasm_fma_f32x4(vacc2x4567, wasm_v128_andnot(va2, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma() [all …]
|
D | 5x8s4-relu-wasmsimd.c | 178 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd() local 185 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0, vb4567c3)); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd() 186 vacc1x4567 = wasm_f32x4_add(vacc1x4567, wasm_f32x4_mul(va1, vb4567c3)); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd() 187 vacc2x4567 = wasm_f32x4_add(vacc2x4567, wasm_f32x4_mul(va2, vb4567c3)); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd() 188 vacc3x4567 = wasm_f32x4_add(vacc3x4567, wasm_f32x4_mul(va3, vb4567c3)); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd() 189 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567c3)); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd() 270 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd() local 277 …4_add(vacc0x4567, wasm_f32x4_mul(wasm_v128_andnot(va0, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3)); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd() 278 …4_add(vacc1x4567, wasm_f32x4_mul(wasm_v128_andnot(va1, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3)); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd() 279 …4_add(vacc2x4567, wasm_f32x4_mul(wasm_v128_andnot(va2, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3)); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd() [all …]
|
D | 5x8s4-wasmsimd.c | 178 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_igemm_ukernel_5x8s4__wasmsimd() local 185 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0, vb4567c3)); in xnn_f32_igemm_ukernel_5x8s4__wasmsimd() 186 vacc1x4567 = wasm_f32x4_add(vacc1x4567, wasm_f32x4_mul(va1, vb4567c3)); in xnn_f32_igemm_ukernel_5x8s4__wasmsimd() 187 vacc2x4567 = wasm_f32x4_add(vacc2x4567, wasm_f32x4_mul(va2, vb4567c3)); in xnn_f32_igemm_ukernel_5x8s4__wasmsimd() 188 vacc3x4567 = wasm_f32x4_add(vacc3x4567, wasm_f32x4_mul(va3, vb4567c3)); in xnn_f32_igemm_ukernel_5x8s4__wasmsimd() 189 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567c3)); in xnn_f32_igemm_ukernel_5x8s4__wasmsimd() 270 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_igemm_ukernel_5x8s4__wasmsimd() local 277 …4_add(vacc0x4567, wasm_f32x4_mul(wasm_v128_andnot(va0, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3)); in xnn_f32_igemm_ukernel_5x8s4__wasmsimd() 278 …4_add(vacc1x4567, wasm_f32x4_mul(wasm_v128_andnot(va1, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3)); in xnn_f32_igemm_ukernel_5x8s4__wasmsimd() 279 …4_add(vacc2x4567, wasm_f32x4_mul(wasm_v128_andnot(va2, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3)); in xnn_f32_igemm_ukernel_5x8s4__wasmsimd() [all …]
|
D | 5x8s4-relu-wasmrelaxedsimd-fma.c | 178 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma() local 185 vacc0x4567 = __builtin_wasm_fma_f32x4(vacc0x4567, va0, vb4567c3); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma() 186 vacc1x4567 = __builtin_wasm_fma_f32x4(vacc1x4567, va1, vb4567c3); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma() 187 vacc2x4567 = __builtin_wasm_fma_f32x4(vacc2x4567, va2, vb4567c3); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma() 188 vacc3x4567 = __builtin_wasm_fma_f32x4(vacc3x4567, va3, vb4567c3); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma() 189 vacc4x4567 = __builtin_wasm_fma_f32x4(vacc4x4567, va4, vb4567c3); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma() 270 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma() local 277 …uiltin_wasm_fma_f32x4(vacc0x4567, wasm_v128_andnot(va0, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma() 278 …uiltin_wasm_fma_f32x4(vacc1x4567, wasm_v128_andnot(va1, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma() 279 …uiltin_wasm_fma_f32x4(vacc2x4567, wasm_v128_andnot(va2, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma() [all …]
|
/external/XNNPACK/src/f32-gemm/gen/ |
D | 5x8s4-wasmrelaxedsimd-fma.c | 153 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma() local 160 vacc0x4567 = __builtin_wasm_fma_f32x4(vacc0x4567, va0, vb4567c3); in xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma() 161 vacc1x4567 = __builtin_wasm_fma_f32x4(vacc1x4567, va1, vb4567c3); in xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma() 162 vacc2x4567 = __builtin_wasm_fma_f32x4(vacc2x4567, va2, vb4567c3); in xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma() 163 vacc3x4567 = __builtin_wasm_fma_f32x4(vacc3x4567, va3, vb4567c3); in xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma() 164 vacc4x4567 = __builtin_wasm_fma_f32x4(vacc4x4567, va4, vb4567c3); in xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma() 245 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma() local 252 …uiltin_wasm_fma_f32x4(vacc0x4567, wasm_v128_andnot(va0, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma() 253 …uiltin_wasm_fma_f32x4(vacc1x4567, wasm_v128_andnot(va1, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma() 254 …uiltin_wasm_fma_f32x4(vacc2x4567, wasm_v128_andnot(va2, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma() [all …]
|
D | 5x8s4-relu-wasmrelaxedsimd-fma.c | 153 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma() local 160 vacc0x4567 = __builtin_wasm_fma_f32x4(vacc0x4567, va0, vb4567c3); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma() 161 vacc1x4567 = __builtin_wasm_fma_f32x4(vacc1x4567, va1, vb4567c3); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma() 162 vacc2x4567 = __builtin_wasm_fma_f32x4(vacc2x4567, va2, vb4567c3); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma() 163 vacc3x4567 = __builtin_wasm_fma_f32x4(vacc3x4567, va3, vb4567c3); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma() 164 vacc4x4567 = __builtin_wasm_fma_f32x4(vacc4x4567, va4, vb4567c3); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma() 245 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma() local 252 …uiltin_wasm_fma_f32x4(vacc0x4567, wasm_v128_andnot(va0, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma() 253 …uiltin_wasm_fma_f32x4(vacc1x4567, wasm_v128_andnot(va1, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma() 254 …uiltin_wasm_fma_f32x4(vacc2x4567, wasm_v128_andnot(va2, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma() [all …]
|
D | 6x8s4-relu-wasmrelaxedsimd-fma.c | 172 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma() local 180 vacc0x4567 = __builtin_wasm_fma_f32x4(vacc0x4567, va0, vb4567c3); in xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma() 181 vacc1x4567 = __builtin_wasm_fma_f32x4(vacc1x4567, va1, vb4567c3); in xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma() 182 vacc2x4567 = __builtin_wasm_fma_f32x4(vacc2x4567, va2, vb4567c3); in xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma() 183 vacc3x4567 = __builtin_wasm_fma_f32x4(vacc3x4567, va3, vb4567c3); in xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma() 184 vacc4x4567 = __builtin_wasm_fma_f32x4(vacc4x4567, va4, vb4567c3); in xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma() 185 vacc5x4567 = __builtin_wasm_fma_f32x4(vacc5x4567, va5, vb4567c3); in xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma() 277 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma() local 285 …uiltin_wasm_fma_f32x4(vacc0x4567, wasm_v128_andnot(va0, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma() 286 …uiltin_wasm_fma_f32x4(vacc1x4567, wasm_v128_andnot(va1, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma() [all …]
|
D | 6x8s4-wasmsimd.c | 172 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_gemm_ukernel_6x8s4__wasmsimd() local 180 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0, vb4567c3)); in xnn_f32_gemm_ukernel_6x8s4__wasmsimd() 181 vacc1x4567 = wasm_f32x4_add(vacc1x4567, wasm_f32x4_mul(va1, vb4567c3)); in xnn_f32_gemm_ukernel_6x8s4__wasmsimd() 182 vacc2x4567 = wasm_f32x4_add(vacc2x4567, wasm_f32x4_mul(va2, vb4567c3)); in xnn_f32_gemm_ukernel_6x8s4__wasmsimd() 183 vacc3x4567 = wasm_f32x4_add(vacc3x4567, wasm_f32x4_mul(va3, vb4567c3)); in xnn_f32_gemm_ukernel_6x8s4__wasmsimd() 184 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567c3)); in xnn_f32_gemm_ukernel_6x8s4__wasmsimd() 185 vacc5x4567 = wasm_f32x4_add(vacc5x4567, wasm_f32x4_mul(va5, vb4567c3)); in xnn_f32_gemm_ukernel_6x8s4__wasmsimd() 277 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_gemm_ukernel_6x8s4__wasmsimd() local 285 …4_add(vacc0x4567, wasm_f32x4_mul(wasm_v128_andnot(va0, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3)); in xnn_f32_gemm_ukernel_6x8s4__wasmsimd() 286 …4_add(vacc1x4567, wasm_f32x4_mul(wasm_v128_andnot(va1, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3)); in xnn_f32_gemm_ukernel_6x8s4__wasmsimd() [all …]
|
D | 6x8s4-wasmrelaxedsimd-fma.c | 172 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma() local 180 vacc0x4567 = __builtin_wasm_fma_f32x4(vacc0x4567, va0, vb4567c3); in xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma() 181 vacc1x4567 = __builtin_wasm_fma_f32x4(vacc1x4567, va1, vb4567c3); in xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma() 182 vacc2x4567 = __builtin_wasm_fma_f32x4(vacc2x4567, va2, vb4567c3); in xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma() 183 vacc3x4567 = __builtin_wasm_fma_f32x4(vacc3x4567, va3, vb4567c3); in xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma() 184 vacc4x4567 = __builtin_wasm_fma_f32x4(vacc4x4567, va4, vb4567c3); in xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma() 185 vacc5x4567 = __builtin_wasm_fma_f32x4(vacc5x4567, va5, vb4567c3); in xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma() 277 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma() local 285 …uiltin_wasm_fma_f32x4(vacc0x4567, wasm_v128_andnot(va0, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma() 286 …uiltin_wasm_fma_f32x4(vacc1x4567, wasm_v128_andnot(va1, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma() [all …]
|
D | 6x8s4-minmax-wasmrelaxedsimd-fma.c | 174 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma() local 182 vacc0x4567 = __builtin_wasm_fma_f32x4(vacc0x4567, va0, vb4567c3); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma() 183 vacc1x4567 = __builtin_wasm_fma_f32x4(vacc1x4567, va1, vb4567c3); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma() 184 vacc2x4567 = __builtin_wasm_fma_f32x4(vacc2x4567, va2, vb4567c3); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma() 185 vacc3x4567 = __builtin_wasm_fma_f32x4(vacc3x4567, va3, vb4567c3); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma() 186 vacc4x4567 = __builtin_wasm_fma_f32x4(vacc4x4567, va4, vb4567c3); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma() 187 vacc5x4567 = __builtin_wasm_fma_f32x4(vacc5x4567, va5, vb4567c3); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma() 279 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma() local 287 …uiltin_wasm_fma_f32x4(vacc0x4567, wasm_v128_andnot(va0, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma() 288 …uiltin_wasm_fma_f32x4(vacc1x4567, wasm_v128_andnot(va1, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma() [all …]
|
D | 4x8s4-wasmrelaxedsimd-fma.c | 134 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma() local 140 vacc0x4567 = __builtin_wasm_fma_f32x4(vacc0x4567, va0, vb4567c3); in xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma() 141 vacc1x4567 = __builtin_wasm_fma_f32x4(vacc1x4567, va1, vb4567c3); in xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma() 142 vacc2x4567 = __builtin_wasm_fma_f32x4(vacc2x4567, va2, vb4567c3); in xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma() 143 vacc3x4567 = __builtin_wasm_fma_f32x4(vacc3x4567, va3, vb4567c3); in xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma() 213 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma() local 219 …uiltin_wasm_fma_f32x4(vacc0x4567, wasm_v128_andnot(va0, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma() 220 …uiltin_wasm_fma_f32x4(vacc1x4567, wasm_v128_andnot(va1, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma() 221 …uiltin_wasm_fma_f32x4(vacc2x4567, wasm_v128_andnot(va2, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma() 222 …uiltin_wasm_fma_f32x4(vacc3x4567, wasm_v128_andnot(va3, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma()
|
D | 3x8s4-wasmrelaxedsimd-fma.c | 115 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_gemm_ukernel_3x8s4__wasmrelaxedsimd_fma() local 120 vacc0x4567 = __builtin_wasm_fma_f32x4(vacc0x4567, va0, vb4567c3); in xnn_f32_gemm_ukernel_3x8s4__wasmrelaxedsimd_fma() 121 vacc1x4567 = __builtin_wasm_fma_f32x4(vacc1x4567, va1, vb4567c3); in xnn_f32_gemm_ukernel_3x8s4__wasmrelaxedsimd_fma() 122 vacc2x4567 = __builtin_wasm_fma_f32x4(vacc2x4567, va2, vb4567c3); in xnn_f32_gemm_ukernel_3x8s4__wasmrelaxedsimd_fma() 181 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_gemm_ukernel_3x8s4__wasmrelaxedsimd_fma() local 186 …uiltin_wasm_fma_f32x4(vacc0x4567, wasm_v128_andnot(va0, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_gemm_ukernel_3x8s4__wasmrelaxedsimd_fma() 187 …uiltin_wasm_fma_f32x4(vacc1x4567, wasm_v128_andnot(va1, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_gemm_ukernel_3x8s4__wasmrelaxedsimd_fma() 188 …uiltin_wasm_fma_f32x4(vacc2x4567, wasm_v128_andnot(va2, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_gemm_ukernel_3x8s4__wasmrelaxedsimd_fma()
|
D | 4x8s4-wasmsimd.c | 134 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_gemm_ukernel_4x8s4__wasmsimd() local 140 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0, vb4567c3)); in xnn_f32_gemm_ukernel_4x8s4__wasmsimd() 141 vacc1x4567 = wasm_f32x4_add(vacc1x4567, wasm_f32x4_mul(va1, vb4567c3)); in xnn_f32_gemm_ukernel_4x8s4__wasmsimd() 142 vacc2x4567 = wasm_f32x4_add(vacc2x4567, wasm_f32x4_mul(va2, vb4567c3)); in xnn_f32_gemm_ukernel_4x8s4__wasmsimd() 143 vacc3x4567 = wasm_f32x4_add(vacc3x4567, wasm_f32x4_mul(va3, vb4567c3)); in xnn_f32_gemm_ukernel_4x8s4__wasmsimd() 213 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_gemm_ukernel_4x8s4__wasmsimd() local 219 …4_add(vacc0x4567, wasm_f32x4_mul(wasm_v128_andnot(va0, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3)); in xnn_f32_gemm_ukernel_4x8s4__wasmsimd() 220 …4_add(vacc1x4567, wasm_f32x4_mul(wasm_v128_andnot(va1, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3)); in xnn_f32_gemm_ukernel_4x8s4__wasmsimd() 221 …4_add(vacc2x4567, wasm_f32x4_mul(wasm_v128_andnot(va2, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3)); in xnn_f32_gemm_ukernel_4x8s4__wasmsimd() 222 …4_add(vacc3x4567, wasm_f32x4_mul(wasm_v128_andnot(va3, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3)); in xnn_f32_gemm_ukernel_4x8s4__wasmsimd()
|
D | 4x8s4-relu-wasmrelaxedsimd-fma.c | 134 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma() local 140 vacc0x4567 = __builtin_wasm_fma_f32x4(vacc0x4567, va0, vb4567c3); in xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma() 141 vacc1x4567 = __builtin_wasm_fma_f32x4(vacc1x4567, va1, vb4567c3); in xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma() 142 vacc2x4567 = __builtin_wasm_fma_f32x4(vacc2x4567, va2, vb4567c3); in xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma() 143 vacc3x4567 = __builtin_wasm_fma_f32x4(vacc3x4567, va3, vb4567c3); in xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma() 213 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma() local 219 …uiltin_wasm_fma_f32x4(vacc0x4567, wasm_v128_andnot(va0, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma() 220 …uiltin_wasm_fma_f32x4(vacc1x4567, wasm_v128_andnot(va1, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma() 221 …uiltin_wasm_fma_f32x4(vacc2x4567, wasm_v128_andnot(va2, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma() 222 …uiltin_wasm_fma_f32x4(vacc3x4567, wasm_v128_andnot(va3, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma()
|
D | 5x8s4-minmax-wasmrelaxedsimd.c | 155 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmrelaxedsimd() local 162 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0, vb4567c3)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmrelaxedsimd() 163 vacc1x4567 = wasm_f32x4_add(vacc1x4567, wasm_f32x4_mul(va1, vb4567c3)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmrelaxedsimd() 164 vacc2x4567 = wasm_f32x4_add(vacc2x4567, wasm_f32x4_mul(va2, vb4567c3)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmrelaxedsimd() 165 vacc3x4567 = wasm_f32x4_add(vacc3x4567, wasm_f32x4_mul(va3, vb4567c3)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmrelaxedsimd() 166 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567c3)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmrelaxedsimd() 247 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmrelaxedsimd() local 254 …4_add(vacc0x4567, wasm_f32x4_mul(wasm_v128_andnot(va0, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmrelaxedsimd() 255 …4_add(vacc1x4567, wasm_f32x4_mul(wasm_v128_andnot(va1, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmrelaxedsimd() 256 …4_add(vacc2x4567, wasm_f32x4_mul(wasm_v128_andnot(va2, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmrelaxedsimd() [all …]
|
D | 5x8s4-minmax-wasmsimd-arm.c | 155 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_arm() local 162 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0, vb4567c3)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_arm() 163 vacc1x4567 = wasm_f32x4_add(vacc1x4567, wasm_f32x4_mul(va1, vb4567c3)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_arm() 164 vacc2x4567 = wasm_f32x4_add(vacc2x4567, wasm_f32x4_mul(va2, vb4567c3)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_arm() 165 vacc3x4567 = wasm_f32x4_add(vacc3x4567, wasm_f32x4_mul(va3, vb4567c3)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_arm() 166 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567c3)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_arm() 247 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_arm() local 254 …4_add(vacc0x4567, wasm_f32x4_mul(wasm_v128_andnot(va0, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_arm() 255 …4_add(vacc1x4567, wasm_f32x4_mul(wasm_v128_andnot(va1, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_arm() 256 …4_add(vacc2x4567, wasm_f32x4_mul(wasm_v128_andnot(va2, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_arm() [all …]
|
D | 5x8s4-relu-wasmsimd.c | 153 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd() local 160 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0, vb4567c3)); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd() 161 vacc1x4567 = wasm_f32x4_add(vacc1x4567, wasm_f32x4_mul(va1, vb4567c3)); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd() 162 vacc2x4567 = wasm_f32x4_add(vacc2x4567, wasm_f32x4_mul(va2, vb4567c3)); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd() 163 vacc3x4567 = wasm_f32x4_add(vacc3x4567, wasm_f32x4_mul(va3, vb4567c3)); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd() 164 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567c3)); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd() 245 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd() local 252 …4_add(vacc0x4567, wasm_f32x4_mul(wasm_v128_andnot(va0, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3)); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd() 253 …4_add(vacc1x4567, wasm_f32x4_mul(wasm_v128_andnot(va1, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3)); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd() 254 …4_add(vacc2x4567, wasm_f32x4_mul(wasm_v128_andnot(va2, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3)); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd() [all …]
|
/external/XNNPACK/src/f32-gemm/gen-inc/ |
D | 5x8s4inc-minmax-wasmsimd-x86.c | 157 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86() local 164 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0, vb4567c3)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86() 165 vacc1x4567 = wasm_f32x4_add(vacc1x4567, wasm_f32x4_mul(va1, vb4567c3)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86() 166 vacc2x4567 = wasm_f32x4_add(vacc2x4567, wasm_f32x4_mul(va2, vb4567c3)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86() 167 vacc3x4567 = wasm_f32x4_add(vacc3x4567, wasm_f32x4_mul(va3, vb4567c3)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86() 168 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567c3)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86() 249 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86() local 256 …4_add(vacc0x4567, wasm_f32x4_mul(wasm_v128_andnot(va0, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86() 257 …4_add(vacc1x4567, wasm_f32x4_mul(wasm_v128_andnot(va1, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86() 258 …4_add(vacc2x4567, wasm_f32x4_mul(wasm_v128_andnot(va2, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86() [all …]
|
D | 5x8s4inc-minmax-wasmrelaxedsimd-fma.c | 157 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma() local 164 vacc0x4567 = __builtin_wasm_fma_f32x4(vacc0x4567, va0, vb4567c3); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma() 165 vacc1x4567 = __builtin_wasm_fma_f32x4(vacc1x4567, va1, vb4567c3); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma() 166 vacc2x4567 = __builtin_wasm_fma_f32x4(vacc2x4567, va2, vb4567c3); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma() 167 vacc3x4567 = __builtin_wasm_fma_f32x4(vacc3x4567, va3, vb4567c3); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma() 168 vacc4x4567 = __builtin_wasm_fma_f32x4(vacc4x4567, va4, vb4567c3); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma() 249 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma() local 256 …uiltin_wasm_fma_f32x4(vacc0x4567, wasm_v128_andnot(va0, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma() 257 …uiltin_wasm_fma_f32x4(vacc1x4567, wasm_v128_andnot(va1, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma() 258 …uiltin_wasm_fma_f32x4(vacc2x4567, wasm_v128_andnot(va2, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma() [all …]
|
D | 5x8s4inc-minmax-wasmsimd-arm.c | 157 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm() local 164 vacc0x4567 = wasm_f32x4_add(vacc0x4567, wasm_f32x4_mul(va0, vb4567c3)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm() 165 vacc1x4567 = wasm_f32x4_add(vacc1x4567, wasm_f32x4_mul(va1, vb4567c3)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm() 166 vacc2x4567 = wasm_f32x4_add(vacc2x4567, wasm_f32x4_mul(va2, vb4567c3)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm() 167 vacc3x4567 = wasm_f32x4_add(vacc3x4567, wasm_f32x4_mul(va3, vb4567c3)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm() 168 vacc4x4567 = wasm_f32x4_add(vacc4x4567, wasm_f32x4_mul(va4, vb4567c3)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm() 249 const v128_t vb4567c3 = wasm_v128_load(w + 28); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm() local 256 …4_add(vacc0x4567, wasm_f32x4_mul(wasm_v128_andnot(va0, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm() 257 …4_add(vacc1x4567, wasm_f32x4_mul(wasm_v128_andnot(va1, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm() 258 …4_add(vacc2x4567, wasm_f32x4_mul(wasm_v128_andnot(va2, wasm_f32x4_eq(vb4567c3, vzero)), vb4567c3)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm() [all …]
|