/external/XNNPACK/src/f32-igemm/gen/ |
D | 5x8s4-wasmrelaxedsimd-fma.c | 117 const v128_t vb0123c0 = wasm_v128_load(w + 0); in xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma() local 120 vacc0x0123 = __builtin_wasm_fma_f32x4(vacc0x0123, va0, vb0123c0); in xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma() 121 vacc1x0123 = __builtin_wasm_fma_f32x4(vacc1x0123, va1, vb0123c0); in xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma() 122 vacc2x0123 = __builtin_wasm_fma_f32x4(vacc2x0123, va2, vb0123c0); in xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma() 123 vacc3x0123 = __builtin_wasm_fma_f32x4(vacc3x0123, va3, vb0123c0); in xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma() 124 vacc4x0123 = __builtin_wasm_fma_f32x4(vacc4x0123, va4, vb0123c0); in xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma() 209 const v128_t vb0123c0 = wasm_v128_load(w + 0); in xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma() local 212 …uiltin_wasm_fma_f32x4(vacc0x0123, wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0); in xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma() 213 …uiltin_wasm_fma_f32x4(vacc1x0123, wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0); in xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma() 214 …uiltin_wasm_fma_f32x4(vacc2x0123, wasm_v128_andnot(va2, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0); in xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma() [all …]
|
D | 6x8s4-wasmrelaxedsimd-fma.c | 130 const v128_t vb0123c0 = wasm_v128_load(w + 0); in xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma() local 133 vacc0x0123 = __builtin_wasm_fma_f32x4(vacc0x0123, va0, vb0123c0); in xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma() 134 vacc1x0123 = __builtin_wasm_fma_f32x4(vacc1x0123, va1, vb0123c0); in xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma() 135 vacc2x0123 = __builtin_wasm_fma_f32x4(vacc2x0123, va2, vb0123c0); in xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma() 136 vacc3x0123 = __builtin_wasm_fma_f32x4(vacc3x0123, va3, vb0123c0); in xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma() 137 vacc4x0123 = __builtin_wasm_fma_f32x4(vacc4x0123, va4, vb0123c0); in xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma() 138 vacc5x0123 = __builtin_wasm_fma_f32x4(vacc5x0123, va5, vb0123c0); in xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma() 235 const v128_t vb0123c0 = wasm_v128_load(w + 0); in xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma() local 238 …uiltin_wasm_fma_f32x4(vacc0x0123, wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0); in xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma() 239 …uiltin_wasm_fma_f32x4(vacc1x0123, wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0); in xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma() [all …]
|
D | 6x8s4-relu-wasmrelaxedsimd-fma.c | 130 const v128_t vb0123c0 = wasm_v128_load(w + 0); in xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma() local 133 vacc0x0123 = __builtin_wasm_fma_f32x4(vacc0x0123, va0, vb0123c0); in xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma() 134 vacc1x0123 = __builtin_wasm_fma_f32x4(vacc1x0123, va1, vb0123c0); in xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma() 135 vacc2x0123 = __builtin_wasm_fma_f32x4(vacc2x0123, va2, vb0123c0); in xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma() 136 vacc3x0123 = __builtin_wasm_fma_f32x4(vacc3x0123, va3, vb0123c0); in xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma() 137 vacc4x0123 = __builtin_wasm_fma_f32x4(vacc4x0123, va4, vb0123c0); in xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma() 138 vacc5x0123 = __builtin_wasm_fma_f32x4(vacc5x0123, va5, vb0123c0); in xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma() 235 const v128_t vb0123c0 = wasm_v128_load(w + 0); in xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma() local 238 …uiltin_wasm_fma_f32x4(vacc0x0123, wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0); in xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma() 239 …uiltin_wasm_fma_f32x4(vacc1x0123, wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0); in xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma() [all …]
|
D | 4x8s4-wasmrelaxedsimd-fma.c | 104 const v128_t vb0123c0 = wasm_v128_load(w + 0); in xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma() local 107 vacc0x0123 = __builtin_wasm_fma_f32x4(vacc0x0123, va0, vb0123c0); in xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma() 108 vacc1x0123 = __builtin_wasm_fma_f32x4(vacc1x0123, va1, vb0123c0); in xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma() 109 vacc2x0123 = __builtin_wasm_fma_f32x4(vacc2x0123, va2, vb0123c0); in xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma() 110 vacc3x0123 = __builtin_wasm_fma_f32x4(vacc3x0123, va3, vb0123c0); in xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma() 183 const v128_t vb0123c0 = wasm_v128_load(w + 0); in xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma() local 186 …uiltin_wasm_fma_f32x4(vacc0x0123, wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0); in xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma() 187 …uiltin_wasm_fma_f32x4(vacc1x0123, wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0); in xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma() 188 …uiltin_wasm_fma_f32x4(vacc2x0123, wasm_v128_andnot(va2, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0); in xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma() 189 …uiltin_wasm_fma_f32x4(vacc3x0123, wasm_v128_andnot(va3, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0); in xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma()
|
D | 4x8s4-relu-wasmrelaxedsimd-fma.c | 104 const v128_t vb0123c0 = wasm_v128_load(w + 0); in xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma() local 107 vacc0x0123 = __builtin_wasm_fma_f32x4(vacc0x0123, va0, vb0123c0); in xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma() 108 vacc1x0123 = __builtin_wasm_fma_f32x4(vacc1x0123, va1, vb0123c0); in xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma() 109 vacc2x0123 = __builtin_wasm_fma_f32x4(vacc2x0123, va2, vb0123c0); in xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma() 110 vacc3x0123 = __builtin_wasm_fma_f32x4(vacc3x0123, va3, vb0123c0); in xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma() 183 const v128_t vb0123c0 = wasm_v128_load(w + 0); in xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma() local 186 …uiltin_wasm_fma_f32x4(vacc0x0123, wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0); in xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma() 187 …uiltin_wasm_fma_f32x4(vacc1x0123, wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0); in xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma() 188 …uiltin_wasm_fma_f32x4(vacc2x0123, wasm_v128_andnot(va2, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0); in xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma() 189 …uiltin_wasm_fma_f32x4(vacc3x0123, wasm_v128_andnot(va3, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0); in xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma()
|
D | 5x8s4-minmax-wasmrelaxedsimd-fma.c | 119 const v128_t vb0123c0 = wasm_v128_load(w + 0); in xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma() local 122 vacc0x0123 = __builtin_wasm_fma_f32x4(vacc0x0123, va0, vb0123c0); in xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma() 123 vacc1x0123 = __builtin_wasm_fma_f32x4(vacc1x0123, va1, vb0123c0); in xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma() 124 vacc2x0123 = __builtin_wasm_fma_f32x4(vacc2x0123, va2, vb0123c0); in xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma() 125 vacc3x0123 = __builtin_wasm_fma_f32x4(vacc3x0123, va3, vb0123c0); in xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma() 126 vacc4x0123 = __builtin_wasm_fma_f32x4(vacc4x0123, va4, vb0123c0); in xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma() 211 const v128_t vb0123c0 = wasm_v128_load(w + 0); in xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma() local 214 …uiltin_wasm_fma_f32x4(vacc0x0123, wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0); in xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma() 215 …uiltin_wasm_fma_f32x4(vacc1x0123, wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0); in xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma() 216 …uiltin_wasm_fma_f32x4(vacc2x0123, wasm_v128_andnot(va2, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0); in xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma() [all …]
|
D | 5x8s4-relu-wasmsimd.c | 117 const v128_t vb0123c0 = wasm_v128_load(w + 0); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd() local 120 vacc0x0123 = wasm_f32x4_add(vacc0x0123, wasm_f32x4_mul(va0, vb0123c0)); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd() 121 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1, vb0123c0)); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd() 122 vacc2x0123 = wasm_f32x4_add(vacc2x0123, wasm_f32x4_mul(va2, vb0123c0)); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd() 123 vacc3x0123 = wasm_f32x4_add(vacc3x0123, wasm_f32x4_mul(va3, vb0123c0)); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd() 124 vacc4x0123 = wasm_f32x4_add(vacc4x0123, wasm_f32x4_mul(va4, vb0123c0)); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd() 209 const v128_t vb0123c0 = wasm_v128_load(w + 0); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd() local 212 …4_add(vacc0x0123, wasm_f32x4_mul(wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0)); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd() 213 …4_add(vacc1x0123, wasm_f32x4_mul(wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0)); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd() 214 …4_add(vacc2x0123, wasm_f32x4_mul(wasm_v128_andnot(va2, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0)); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd() [all …]
|
D | 5x8s4-wasmsimd.c | 117 const v128_t vb0123c0 = wasm_v128_load(w + 0); in xnn_f32_igemm_ukernel_5x8s4__wasmsimd() local 120 vacc0x0123 = wasm_f32x4_add(vacc0x0123, wasm_f32x4_mul(va0, vb0123c0)); in xnn_f32_igemm_ukernel_5x8s4__wasmsimd() 121 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1, vb0123c0)); in xnn_f32_igemm_ukernel_5x8s4__wasmsimd() 122 vacc2x0123 = wasm_f32x4_add(vacc2x0123, wasm_f32x4_mul(va2, vb0123c0)); in xnn_f32_igemm_ukernel_5x8s4__wasmsimd() 123 vacc3x0123 = wasm_f32x4_add(vacc3x0123, wasm_f32x4_mul(va3, vb0123c0)); in xnn_f32_igemm_ukernel_5x8s4__wasmsimd() 124 vacc4x0123 = wasm_f32x4_add(vacc4x0123, wasm_f32x4_mul(va4, vb0123c0)); in xnn_f32_igemm_ukernel_5x8s4__wasmsimd() 209 const v128_t vb0123c0 = wasm_v128_load(w + 0); in xnn_f32_igemm_ukernel_5x8s4__wasmsimd() local 212 …4_add(vacc0x0123, wasm_f32x4_mul(wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0)); in xnn_f32_igemm_ukernel_5x8s4__wasmsimd() 213 …4_add(vacc1x0123, wasm_f32x4_mul(wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0)); in xnn_f32_igemm_ukernel_5x8s4__wasmsimd() 214 …4_add(vacc2x0123, wasm_f32x4_mul(wasm_v128_andnot(va2, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0)); in xnn_f32_igemm_ukernel_5x8s4__wasmsimd() [all …]
|
D | 5x8s4-relu-wasmrelaxedsimd-fma.c | 117 const v128_t vb0123c0 = wasm_v128_load(w + 0); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma() local 120 vacc0x0123 = __builtin_wasm_fma_f32x4(vacc0x0123, va0, vb0123c0); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma() 121 vacc1x0123 = __builtin_wasm_fma_f32x4(vacc1x0123, va1, vb0123c0); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma() 122 vacc2x0123 = __builtin_wasm_fma_f32x4(vacc2x0123, va2, vb0123c0); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma() 123 vacc3x0123 = __builtin_wasm_fma_f32x4(vacc3x0123, va3, vb0123c0); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma() 124 vacc4x0123 = __builtin_wasm_fma_f32x4(vacc4x0123, va4, vb0123c0); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma() 209 const v128_t vb0123c0 = wasm_v128_load(w + 0); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma() local 212 …uiltin_wasm_fma_f32x4(vacc0x0123, wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma() 213 …uiltin_wasm_fma_f32x4(vacc1x0123, wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma() 214 …uiltin_wasm_fma_f32x4(vacc2x0123, wasm_v128_andnot(va2, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma() [all …]
|
/external/XNNPACK/src/f32-gemm/gen/ |
D | 5x8s4-wasmrelaxedsimd-fma.c | 92 const v128_t vb0123c0 = wasm_v128_load(w + 0); in xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma() local 95 vacc0x0123 = __builtin_wasm_fma_f32x4(vacc0x0123, va0, vb0123c0); in xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma() 96 vacc1x0123 = __builtin_wasm_fma_f32x4(vacc1x0123, va1, vb0123c0); in xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma() 97 vacc2x0123 = __builtin_wasm_fma_f32x4(vacc2x0123, va2, vb0123c0); in xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma() 98 vacc3x0123 = __builtin_wasm_fma_f32x4(vacc3x0123, va3, vb0123c0); in xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma() 99 vacc4x0123 = __builtin_wasm_fma_f32x4(vacc4x0123, va4, vb0123c0); in xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma() 184 const v128_t vb0123c0 = wasm_v128_load(w + 0); in xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma() local 187 …uiltin_wasm_fma_f32x4(vacc0x0123, wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0); in xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma() 188 …uiltin_wasm_fma_f32x4(vacc1x0123, wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0); in xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma() 189 …uiltin_wasm_fma_f32x4(vacc2x0123, wasm_v128_andnot(va2, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0); in xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma() [all …]
|
D | 5x8s4-relu-wasmrelaxedsimd-fma.c | 92 const v128_t vb0123c0 = wasm_v128_load(w + 0); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma() local 95 vacc0x0123 = __builtin_wasm_fma_f32x4(vacc0x0123, va0, vb0123c0); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma() 96 vacc1x0123 = __builtin_wasm_fma_f32x4(vacc1x0123, va1, vb0123c0); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma() 97 vacc2x0123 = __builtin_wasm_fma_f32x4(vacc2x0123, va2, vb0123c0); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma() 98 vacc3x0123 = __builtin_wasm_fma_f32x4(vacc3x0123, va3, vb0123c0); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma() 99 vacc4x0123 = __builtin_wasm_fma_f32x4(vacc4x0123, va4, vb0123c0); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma() 184 const v128_t vb0123c0 = wasm_v128_load(w + 0); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma() local 187 …uiltin_wasm_fma_f32x4(vacc0x0123, wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma() 188 …uiltin_wasm_fma_f32x4(vacc1x0123, wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma() 189 …uiltin_wasm_fma_f32x4(vacc2x0123, wasm_v128_andnot(va2, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma() [all …]
|
D | 6x8s4-relu-wasmrelaxedsimd-fma.c | 102 const v128_t vb0123c0 = wasm_v128_load(w + 0); in xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma() local 105 vacc0x0123 = __builtin_wasm_fma_f32x4(vacc0x0123, va0, vb0123c0); in xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma() 106 vacc1x0123 = __builtin_wasm_fma_f32x4(vacc1x0123, va1, vb0123c0); in xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma() 107 vacc2x0123 = __builtin_wasm_fma_f32x4(vacc2x0123, va2, vb0123c0); in xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma() 108 vacc3x0123 = __builtin_wasm_fma_f32x4(vacc3x0123, va3, vb0123c0); in xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma() 109 vacc4x0123 = __builtin_wasm_fma_f32x4(vacc4x0123, va4, vb0123c0); in xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma() 110 vacc5x0123 = __builtin_wasm_fma_f32x4(vacc5x0123, va5, vb0123c0); in xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma() 207 const v128_t vb0123c0 = wasm_v128_load(w + 0); in xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma() local 210 …uiltin_wasm_fma_f32x4(vacc0x0123, wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0); in xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma() 211 …uiltin_wasm_fma_f32x4(vacc1x0123, wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0); in xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma() [all …]
|
D | 6x8s4-wasmsimd.c | 102 const v128_t vb0123c0 = wasm_v128_load(w + 0); in xnn_f32_gemm_ukernel_6x8s4__wasmsimd() local 105 vacc0x0123 = wasm_f32x4_add(vacc0x0123, wasm_f32x4_mul(va0, vb0123c0)); in xnn_f32_gemm_ukernel_6x8s4__wasmsimd() 106 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1, vb0123c0)); in xnn_f32_gemm_ukernel_6x8s4__wasmsimd() 107 vacc2x0123 = wasm_f32x4_add(vacc2x0123, wasm_f32x4_mul(va2, vb0123c0)); in xnn_f32_gemm_ukernel_6x8s4__wasmsimd() 108 vacc3x0123 = wasm_f32x4_add(vacc3x0123, wasm_f32x4_mul(va3, vb0123c0)); in xnn_f32_gemm_ukernel_6x8s4__wasmsimd() 109 vacc4x0123 = wasm_f32x4_add(vacc4x0123, wasm_f32x4_mul(va4, vb0123c0)); in xnn_f32_gemm_ukernel_6x8s4__wasmsimd() 110 vacc5x0123 = wasm_f32x4_add(vacc5x0123, wasm_f32x4_mul(va5, vb0123c0)); in xnn_f32_gemm_ukernel_6x8s4__wasmsimd() 207 const v128_t vb0123c0 = wasm_v128_load(w + 0); in xnn_f32_gemm_ukernel_6x8s4__wasmsimd() local 210 …4_add(vacc0x0123, wasm_f32x4_mul(wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0)); in xnn_f32_gemm_ukernel_6x8s4__wasmsimd() 211 …4_add(vacc1x0123, wasm_f32x4_mul(wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0)); in xnn_f32_gemm_ukernel_6x8s4__wasmsimd() [all …]
|
D | 6x8s4-wasmrelaxedsimd-fma.c | 102 const v128_t vb0123c0 = wasm_v128_load(w + 0); in xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma() local 105 vacc0x0123 = __builtin_wasm_fma_f32x4(vacc0x0123, va0, vb0123c0); in xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma() 106 vacc1x0123 = __builtin_wasm_fma_f32x4(vacc1x0123, va1, vb0123c0); in xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma() 107 vacc2x0123 = __builtin_wasm_fma_f32x4(vacc2x0123, va2, vb0123c0); in xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma() 108 vacc3x0123 = __builtin_wasm_fma_f32x4(vacc3x0123, va3, vb0123c0); in xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma() 109 vacc4x0123 = __builtin_wasm_fma_f32x4(vacc4x0123, va4, vb0123c0); in xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma() 110 vacc5x0123 = __builtin_wasm_fma_f32x4(vacc5x0123, va5, vb0123c0); in xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma() 207 const v128_t vb0123c0 = wasm_v128_load(w + 0); in xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma() local 210 …uiltin_wasm_fma_f32x4(vacc0x0123, wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0); in xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma() 211 …uiltin_wasm_fma_f32x4(vacc1x0123, wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0); in xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma() [all …]
|
D | 6x8s4-minmax-wasmrelaxedsimd-fma.c | 104 const v128_t vb0123c0 = wasm_v128_load(w + 0); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma() local 107 vacc0x0123 = __builtin_wasm_fma_f32x4(vacc0x0123, va0, vb0123c0); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma() 108 vacc1x0123 = __builtin_wasm_fma_f32x4(vacc1x0123, va1, vb0123c0); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma() 109 vacc2x0123 = __builtin_wasm_fma_f32x4(vacc2x0123, va2, vb0123c0); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma() 110 vacc3x0123 = __builtin_wasm_fma_f32x4(vacc3x0123, va3, vb0123c0); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma() 111 vacc4x0123 = __builtin_wasm_fma_f32x4(vacc4x0123, va4, vb0123c0); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma() 112 vacc5x0123 = __builtin_wasm_fma_f32x4(vacc5x0123, va5, vb0123c0); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma() 209 const v128_t vb0123c0 = wasm_v128_load(w + 0); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma() local 212 …uiltin_wasm_fma_f32x4(vacc0x0123, wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma() 213 …uiltin_wasm_fma_f32x4(vacc1x0123, wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma() [all …]
|
D | 4x8s4-wasmrelaxedsimd-fma.c | 82 const v128_t vb0123c0 = wasm_v128_load(w + 0); in xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma() local 85 vacc0x0123 = __builtin_wasm_fma_f32x4(vacc0x0123, va0, vb0123c0); in xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma() 86 vacc1x0123 = __builtin_wasm_fma_f32x4(vacc1x0123, va1, vb0123c0); in xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma() 87 vacc2x0123 = __builtin_wasm_fma_f32x4(vacc2x0123, va2, vb0123c0); in xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma() 88 vacc3x0123 = __builtin_wasm_fma_f32x4(vacc3x0123, va3, vb0123c0); in xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma() 161 const v128_t vb0123c0 = wasm_v128_load(w + 0); in xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma() local 164 …uiltin_wasm_fma_f32x4(vacc0x0123, wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0); in xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma() 165 …uiltin_wasm_fma_f32x4(vacc1x0123, wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0); in xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma() 166 …uiltin_wasm_fma_f32x4(vacc2x0123, wasm_v128_andnot(va2, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0); in xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma() 167 …uiltin_wasm_fma_f32x4(vacc3x0123, wasm_v128_andnot(va3, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0); in xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma()
|
D | 3x8s4-wasmrelaxedsimd-fma.c | 72 const v128_t vb0123c0 = wasm_v128_load(w + 0); in xnn_f32_gemm_ukernel_3x8s4__wasmrelaxedsimd_fma() local 75 vacc0x0123 = __builtin_wasm_fma_f32x4(vacc0x0123, va0, vb0123c0); in xnn_f32_gemm_ukernel_3x8s4__wasmrelaxedsimd_fma() 76 vacc1x0123 = __builtin_wasm_fma_f32x4(vacc1x0123, va1, vb0123c0); in xnn_f32_gemm_ukernel_3x8s4__wasmrelaxedsimd_fma() 77 vacc2x0123 = __builtin_wasm_fma_f32x4(vacc2x0123, va2, vb0123c0); in xnn_f32_gemm_ukernel_3x8s4__wasmrelaxedsimd_fma() 138 const v128_t vb0123c0 = wasm_v128_load(w + 0); in xnn_f32_gemm_ukernel_3x8s4__wasmrelaxedsimd_fma() local 141 …uiltin_wasm_fma_f32x4(vacc0x0123, wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0); in xnn_f32_gemm_ukernel_3x8s4__wasmrelaxedsimd_fma() 142 …uiltin_wasm_fma_f32x4(vacc1x0123, wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0); in xnn_f32_gemm_ukernel_3x8s4__wasmrelaxedsimd_fma() 143 …uiltin_wasm_fma_f32x4(vacc2x0123, wasm_v128_andnot(va2, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0); in xnn_f32_gemm_ukernel_3x8s4__wasmrelaxedsimd_fma()
|
D | 4x8s4-wasmsimd.c | 82 const v128_t vb0123c0 = wasm_v128_load(w + 0); in xnn_f32_gemm_ukernel_4x8s4__wasmsimd() local 85 vacc0x0123 = wasm_f32x4_add(vacc0x0123, wasm_f32x4_mul(va0, vb0123c0)); in xnn_f32_gemm_ukernel_4x8s4__wasmsimd() 86 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1, vb0123c0)); in xnn_f32_gemm_ukernel_4x8s4__wasmsimd() 87 vacc2x0123 = wasm_f32x4_add(vacc2x0123, wasm_f32x4_mul(va2, vb0123c0)); in xnn_f32_gemm_ukernel_4x8s4__wasmsimd() 88 vacc3x0123 = wasm_f32x4_add(vacc3x0123, wasm_f32x4_mul(va3, vb0123c0)); in xnn_f32_gemm_ukernel_4x8s4__wasmsimd() 161 const v128_t vb0123c0 = wasm_v128_load(w + 0); in xnn_f32_gemm_ukernel_4x8s4__wasmsimd() local 164 …4_add(vacc0x0123, wasm_f32x4_mul(wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0)); in xnn_f32_gemm_ukernel_4x8s4__wasmsimd() 165 …4_add(vacc1x0123, wasm_f32x4_mul(wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0)); in xnn_f32_gemm_ukernel_4x8s4__wasmsimd() 166 …4_add(vacc2x0123, wasm_f32x4_mul(wasm_v128_andnot(va2, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0)); in xnn_f32_gemm_ukernel_4x8s4__wasmsimd() 167 …4_add(vacc3x0123, wasm_f32x4_mul(wasm_v128_andnot(va3, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0)); in xnn_f32_gemm_ukernel_4x8s4__wasmsimd()
|
D | 4x8s4-relu-wasmrelaxedsimd-fma.c | 82 const v128_t vb0123c0 = wasm_v128_load(w + 0); in xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma() local 85 vacc0x0123 = __builtin_wasm_fma_f32x4(vacc0x0123, va0, vb0123c0); in xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma() 86 vacc1x0123 = __builtin_wasm_fma_f32x4(vacc1x0123, va1, vb0123c0); in xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma() 87 vacc2x0123 = __builtin_wasm_fma_f32x4(vacc2x0123, va2, vb0123c0); in xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma() 88 vacc3x0123 = __builtin_wasm_fma_f32x4(vacc3x0123, va3, vb0123c0); in xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma() 161 const v128_t vb0123c0 = wasm_v128_load(w + 0); in xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma() local 164 …uiltin_wasm_fma_f32x4(vacc0x0123, wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0); in xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma() 165 …uiltin_wasm_fma_f32x4(vacc1x0123, wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0); in xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma() 166 …uiltin_wasm_fma_f32x4(vacc2x0123, wasm_v128_andnot(va2, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0); in xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma() 167 …uiltin_wasm_fma_f32x4(vacc3x0123, wasm_v128_andnot(va3, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0); in xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma()
|
D | 5x8s4-minmax-wasmrelaxedsimd.c | 94 const v128_t vb0123c0 = wasm_v128_load(w + 0); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmrelaxedsimd() local 97 vacc0x0123 = wasm_f32x4_add(vacc0x0123, wasm_f32x4_mul(va0, vb0123c0)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmrelaxedsimd() 98 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1, vb0123c0)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmrelaxedsimd() 99 vacc2x0123 = wasm_f32x4_add(vacc2x0123, wasm_f32x4_mul(va2, vb0123c0)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmrelaxedsimd() 100 vacc3x0123 = wasm_f32x4_add(vacc3x0123, wasm_f32x4_mul(va3, vb0123c0)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmrelaxedsimd() 101 vacc4x0123 = wasm_f32x4_add(vacc4x0123, wasm_f32x4_mul(va4, vb0123c0)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmrelaxedsimd() 186 const v128_t vb0123c0 = wasm_v128_load(w + 0); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmrelaxedsimd() local 189 …4_add(vacc0x0123, wasm_f32x4_mul(wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmrelaxedsimd() 190 …4_add(vacc1x0123, wasm_f32x4_mul(wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmrelaxedsimd() 191 …4_add(vacc2x0123, wasm_f32x4_mul(wasm_v128_andnot(va2, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmrelaxedsimd() [all …]
|
D | 5x8s4-minmax-wasmsimd-arm.c | 94 const v128_t vb0123c0 = wasm_v128_load(w + 0); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_arm() local 97 vacc0x0123 = wasm_f32x4_add(vacc0x0123, wasm_f32x4_mul(va0, vb0123c0)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_arm() 98 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1, vb0123c0)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_arm() 99 vacc2x0123 = wasm_f32x4_add(vacc2x0123, wasm_f32x4_mul(va2, vb0123c0)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_arm() 100 vacc3x0123 = wasm_f32x4_add(vacc3x0123, wasm_f32x4_mul(va3, vb0123c0)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_arm() 101 vacc4x0123 = wasm_f32x4_add(vacc4x0123, wasm_f32x4_mul(va4, vb0123c0)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_arm() 186 const v128_t vb0123c0 = wasm_v128_load(w + 0); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_arm() local 189 …4_add(vacc0x0123, wasm_f32x4_mul(wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_arm() 190 …4_add(vacc1x0123, wasm_f32x4_mul(wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_arm() 191 …4_add(vacc2x0123, wasm_f32x4_mul(wasm_v128_andnot(va2, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_arm() [all …]
|
D | 5x8s4-relu-wasmsimd.c | 92 const v128_t vb0123c0 = wasm_v128_load(w + 0); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd() local 95 vacc0x0123 = wasm_f32x4_add(vacc0x0123, wasm_f32x4_mul(va0, vb0123c0)); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd() 96 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1, vb0123c0)); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd() 97 vacc2x0123 = wasm_f32x4_add(vacc2x0123, wasm_f32x4_mul(va2, vb0123c0)); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd() 98 vacc3x0123 = wasm_f32x4_add(vacc3x0123, wasm_f32x4_mul(va3, vb0123c0)); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd() 99 vacc4x0123 = wasm_f32x4_add(vacc4x0123, wasm_f32x4_mul(va4, vb0123c0)); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd() 184 const v128_t vb0123c0 = wasm_v128_load(w + 0); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd() local 187 …4_add(vacc0x0123, wasm_f32x4_mul(wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0)); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd() 188 …4_add(vacc1x0123, wasm_f32x4_mul(wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0)); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd() 189 …4_add(vacc2x0123, wasm_f32x4_mul(wasm_v128_andnot(va2, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0)); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd() [all …]
|
/external/XNNPACK/src/f32-gemm/gen-inc/ |
D | 5x8s4inc-minmax-wasmsimd-x86.c | 96 const v128_t vb0123c0 = wasm_v128_load(w + 0); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86() local 99 vacc0x0123 = wasm_f32x4_add(vacc0x0123, wasm_f32x4_mul(va0, vb0123c0)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86() 100 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1, vb0123c0)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86() 101 vacc2x0123 = wasm_f32x4_add(vacc2x0123, wasm_f32x4_mul(va2, vb0123c0)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86() 102 vacc3x0123 = wasm_f32x4_add(vacc3x0123, wasm_f32x4_mul(va3, vb0123c0)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86() 103 vacc4x0123 = wasm_f32x4_add(vacc4x0123, wasm_f32x4_mul(va4, vb0123c0)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86() 188 const v128_t vb0123c0 = wasm_v128_load(w + 0); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86() local 191 …4_add(vacc0x0123, wasm_f32x4_mul(wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86() 192 …4_add(vacc1x0123, wasm_f32x4_mul(wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86() 193 …4_add(vacc2x0123, wasm_f32x4_mul(wasm_v128_andnot(va2, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86() [all …]
|
D | 5x8s4inc-minmax-wasmrelaxedsimd-fma.c | 96 const v128_t vb0123c0 = wasm_v128_load(w + 0); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma() local 99 vacc0x0123 = __builtin_wasm_fma_f32x4(vacc0x0123, va0, vb0123c0); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma() 100 vacc1x0123 = __builtin_wasm_fma_f32x4(vacc1x0123, va1, vb0123c0); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma() 101 vacc2x0123 = __builtin_wasm_fma_f32x4(vacc2x0123, va2, vb0123c0); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma() 102 vacc3x0123 = __builtin_wasm_fma_f32x4(vacc3x0123, va3, vb0123c0); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma() 103 vacc4x0123 = __builtin_wasm_fma_f32x4(vacc4x0123, va4, vb0123c0); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma() 188 const v128_t vb0123c0 = wasm_v128_load(w + 0); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma() local 191 …uiltin_wasm_fma_f32x4(vacc0x0123, wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma() 192 …uiltin_wasm_fma_f32x4(vacc1x0123, wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma() 193 …uiltin_wasm_fma_f32x4(vacc2x0123, wasm_v128_andnot(va2, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma() [all …]
|
D | 5x8s4inc-minmax-wasmsimd-arm.c | 96 const v128_t vb0123c0 = wasm_v128_load(w + 0); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm() local 99 vacc0x0123 = wasm_f32x4_add(vacc0x0123, wasm_f32x4_mul(va0, vb0123c0)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm() 100 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1, vb0123c0)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm() 101 vacc2x0123 = wasm_f32x4_add(vacc2x0123, wasm_f32x4_mul(va2, vb0123c0)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm() 102 vacc3x0123 = wasm_f32x4_add(vacc3x0123, wasm_f32x4_mul(va3, vb0123c0)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm() 103 vacc4x0123 = wasm_f32x4_add(vacc4x0123, wasm_f32x4_mul(va4, vb0123c0)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm() 188 const v128_t vb0123c0 = wasm_v128_load(w + 0); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm() local 191 …4_add(vacc0x0123, wasm_f32x4_mul(wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm() 192 …4_add(vacc1x0123, wasm_f32x4_mul(wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm() 193 …4_add(vacc2x0123, wasm_f32x4_mul(wasm_v128_andnot(va2, wasm_f32x4_eq(vb0123c0, vzero)), vb0123c0)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm() [all …]
|