/external/XNNPACK/src/f32-igemm/gen/ |
D | 5x8s4-wasmrelaxedsimd-fma.c | 177 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma() local 180 vacc0x0123 = __builtin_wasm_fma_f32x4(vacc0x0123, va0, vb0123c3); in xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma() 181 vacc1x0123 = __builtin_wasm_fma_f32x4(vacc1x0123, va1, vb0123c3); in xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma() 182 vacc2x0123 = __builtin_wasm_fma_f32x4(vacc2x0123, va2, vb0123c3); in xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma() 183 vacc3x0123 = __builtin_wasm_fma_f32x4(vacc3x0123, va3, vb0123c3); in xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma() 184 vacc4x0123 = __builtin_wasm_fma_f32x4(vacc4x0123, va4, vb0123c3); in xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma() 269 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma() local 272 …uiltin_wasm_fma_f32x4(vacc0x0123, wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma() 273 …uiltin_wasm_fma_f32x4(vacc1x0123, wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma() 274 …uiltin_wasm_fma_f32x4(vacc2x0123, wasm_v128_andnot(va2, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_igemm_ukernel_5x8s4__wasmrelaxedsimd_fma() [all …]
|
D | 6x8s4-wasmrelaxedsimd-fma.c | 199 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma() local 202 vacc0x0123 = __builtin_wasm_fma_f32x4(vacc0x0123, va0, vb0123c3); in xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma() 203 vacc1x0123 = __builtin_wasm_fma_f32x4(vacc1x0123, va1, vb0123c3); in xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma() 204 vacc2x0123 = __builtin_wasm_fma_f32x4(vacc2x0123, va2, vb0123c3); in xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma() 205 vacc3x0123 = __builtin_wasm_fma_f32x4(vacc3x0123, va3, vb0123c3); in xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma() 206 vacc4x0123 = __builtin_wasm_fma_f32x4(vacc4x0123, va4, vb0123c3); in xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma() 207 vacc5x0123 = __builtin_wasm_fma_f32x4(vacc5x0123, va5, vb0123c3); in xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma() 304 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma() local 307 …uiltin_wasm_fma_f32x4(vacc0x0123, wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma() 308 …uiltin_wasm_fma_f32x4(vacc1x0123, wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_igemm_ukernel_6x8s4__wasmrelaxedsimd_fma() [all …]
|
D | 6x8s4-relu-wasmrelaxedsimd-fma.c | 199 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma() local 202 vacc0x0123 = __builtin_wasm_fma_f32x4(vacc0x0123, va0, vb0123c3); in xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma() 203 vacc1x0123 = __builtin_wasm_fma_f32x4(vacc1x0123, va1, vb0123c3); in xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma() 204 vacc2x0123 = __builtin_wasm_fma_f32x4(vacc2x0123, va2, vb0123c3); in xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma() 205 vacc3x0123 = __builtin_wasm_fma_f32x4(vacc3x0123, va3, vb0123c3); in xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma() 206 vacc4x0123 = __builtin_wasm_fma_f32x4(vacc4x0123, va4, vb0123c3); in xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma() 207 vacc5x0123 = __builtin_wasm_fma_f32x4(vacc5x0123, va5, vb0123c3); in xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma() 304 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma() local 307 …uiltin_wasm_fma_f32x4(vacc0x0123, wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma() 308 …uiltin_wasm_fma_f32x4(vacc1x0123, wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_igemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma() [all …]
|
D | 4x8s4-wasmrelaxedsimd-fma.c | 155 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma() local 158 vacc0x0123 = __builtin_wasm_fma_f32x4(vacc0x0123, va0, vb0123c3); in xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma() 159 vacc1x0123 = __builtin_wasm_fma_f32x4(vacc1x0123, va1, vb0123c3); in xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma() 160 vacc2x0123 = __builtin_wasm_fma_f32x4(vacc2x0123, va2, vb0123c3); in xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma() 161 vacc3x0123 = __builtin_wasm_fma_f32x4(vacc3x0123, va3, vb0123c3); in xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma() 234 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma() local 237 …uiltin_wasm_fma_f32x4(vacc0x0123, wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma() 238 …uiltin_wasm_fma_f32x4(vacc1x0123, wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma() 239 …uiltin_wasm_fma_f32x4(vacc2x0123, wasm_v128_andnot(va2, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma() 240 …uiltin_wasm_fma_f32x4(vacc3x0123, wasm_v128_andnot(va3, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_igemm_ukernel_4x8s4__wasmrelaxedsimd_fma()
|
D | 4x8s4-relu-wasmrelaxedsimd-fma.c | 155 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma() local 158 vacc0x0123 = __builtin_wasm_fma_f32x4(vacc0x0123, va0, vb0123c3); in xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma() 159 vacc1x0123 = __builtin_wasm_fma_f32x4(vacc1x0123, va1, vb0123c3); in xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma() 160 vacc2x0123 = __builtin_wasm_fma_f32x4(vacc2x0123, va2, vb0123c3); in xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma() 161 vacc3x0123 = __builtin_wasm_fma_f32x4(vacc3x0123, va3, vb0123c3); in xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma() 234 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma() local 237 …uiltin_wasm_fma_f32x4(vacc0x0123, wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma() 238 …uiltin_wasm_fma_f32x4(vacc1x0123, wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma() 239 …uiltin_wasm_fma_f32x4(vacc2x0123, wasm_v128_andnot(va2, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma() 240 …uiltin_wasm_fma_f32x4(vacc3x0123, wasm_v128_andnot(va3, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_igemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma()
|
D | 5x8s4-minmax-wasmrelaxedsimd-fma.c | 179 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma() local 182 vacc0x0123 = __builtin_wasm_fma_f32x4(vacc0x0123, va0, vb0123c3); in xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma() 183 vacc1x0123 = __builtin_wasm_fma_f32x4(vacc1x0123, va1, vb0123c3); in xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma() 184 vacc2x0123 = __builtin_wasm_fma_f32x4(vacc2x0123, va2, vb0123c3); in xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma() 185 vacc3x0123 = __builtin_wasm_fma_f32x4(vacc3x0123, va3, vb0123c3); in xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma() 186 vacc4x0123 = __builtin_wasm_fma_f32x4(vacc4x0123, va4, vb0123c3); in xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma() 271 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma() local 274 …uiltin_wasm_fma_f32x4(vacc0x0123, wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma() 275 …uiltin_wasm_fma_f32x4(vacc1x0123, wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma() 276 …uiltin_wasm_fma_f32x4(vacc2x0123, wasm_v128_andnot(va2, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_igemm_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma() [all …]
|
D | 5x8s4-relu-wasmsimd.c | 177 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd() local 180 vacc0x0123 = wasm_f32x4_add(vacc0x0123, wasm_f32x4_mul(va0, vb0123c3)); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd() 181 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1, vb0123c3)); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd() 182 vacc2x0123 = wasm_f32x4_add(vacc2x0123, wasm_f32x4_mul(va2, vb0123c3)); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd() 183 vacc3x0123 = wasm_f32x4_add(vacc3x0123, wasm_f32x4_mul(va3, vb0123c3)); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd() 184 vacc4x0123 = wasm_f32x4_add(vacc4x0123, wasm_f32x4_mul(va4, vb0123c3)); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd() 269 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd() local 272 …4_add(vacc0x0123, wasm_f32x4_mul(wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3)); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd() 273 …4_add(vacc1x0123, wasm_f32x4_mul(wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3)); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd() 274 …4_add(vacc2x0123, wasm_f32x4_mul(wasm_v128_andnot(va2, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3)); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmsimd() [all …]
|
D | 5x8s4-wasmsimd.c | 177 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_igemm_ukernel_5x8s4__wasmsimd() local 180 vacc0x0123 = wasm_f32x4_add(vacc0x0123, wasm_f32x4_mul(va0, vb0123c3)); in xnn_f32_igemm_ukernel_5x8s4__wasmsimd() 181 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1, vb0123c3)); in xnn_f32_igemm_ukernel_5x8s4__wasmsimd() 182 vacc2x0123 = wasm_f32x4_add(vacc2x0123, wasm_f32x4_mul(va2, vb0123c3)); in xnn_f32_igemm_ukernel_5x8s4__wasmsimd() 183 vacc3x0123 = wasm_f32x4_add(vacc3x0123, wasm_f32x4_mul(va3, vb0123c3)); in xnn_f32_igemm_ukernel_5x8s4__wasmsimd() 184 vacc4x0123 = wasm_f32x4_add(vacc4x0123, wasm_f32x4_mul(va4, vb0123c3)); in xnn_f32_igemm_ukernel_5x8s4__wasmsimd() 269 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_igemm_ukernel_5x8s4__wasmsimd() local 272 …4_add(vacc0x0123, wasm_f32x4_mul(wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3)); in xnn_f32_igemm_ukernel_5x8s4__wasmsimd() 273 …4_add(vacc1x0123, wasm_f32x4_mul(wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3)); in xnn_f32_igemm_ukernel_5x8s4__wasmsimd() 274 …4_add(vacc2x0123, wasm_f32x4_mul(wasm_v128_andnot(va2, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3)); in xnn_f32_igemm_ukernel_5x8s4__wasmsimd() [all …]
|
D | 5x8s4-relu-wasmrelaxedsimd-fma.c | 177 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma() local 180 vacc0x0123 = __builtin_wasm_fma_f32x4(vacc0x0123, va0, vb0123c3); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma() 181 vacc1x0123 = __builtin_wasm_fma_f32x4(vacc1x0123, va1, vb0123c3); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma() 182 vacc2x0123 = __builtin_wasm_fma_f32x4(vacc2x0123, va2, vb0123c3); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma() 183 vacc3x0123 = __builtin_wasm_fma_f32x4(vacc3x0123, va3, vb0123c3); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma() 184 vacc4x0123 = __builtin_wasm_fma_f32x4(vacc4x0123, va4, vb0123c3); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma() 269 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma() local 272 …uiltin_wasm_fma_f32x4(vacc0x0123, wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma() 273 …uiltin_wasm_fma_f32x4(vacc1x0123, wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma() 274 …uiltin_wasm_fma_f32x4(vacc2x0123, wasm_v128_andnot(va2, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_igemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma() [all …]
|
/external/XNNPACK/src/f32-gemm/gen/ |
D | 5x8s4-wasmrelaxedsimd-fma.c | 152 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma() local 155 vacc0x0123 = __builtin_wasm_fma_f32x4(vacc0x0123, va0, vb0123c3); in xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma() 156 vacc1x0123 = __builtin_wasm_fma_f32x4(vacc1x0123, va1, vb0123c3); in xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma() 157 vacc2x0123 = __builtin_wasm_fma_f32x4(vacc2x0123, va2, vb0123c3); in xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma() 158 vacc3x0123 = __builtin_wasm_fma_f32x4(vacc3x0123, va3, vb0123c3); in xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma() 159 vacc4x0123 = __builtin_wasm_fma_f32x4(vacc4x0123, va4, vb0123c3); in xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma() 244 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma() local 247 …uiltin_wasm_fma_f32x4(vacc0x0123, wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma() 248 …uiltin_wasm_fma_f32x4(vacc1x0123, wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma() 249 …uiltin_wasm_fma_f32x4(vacc2x0123, wasm_v128_andnot(va2, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_gemm_ukernel_5x8s4__wasmrelaxedsimd_fma() [all …]
|
D | 5x8s4-relu-wasmrelaxedsimd-fma.c | 152 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma() local 155 vacc0x0123 = __builtin_wasm_fma_f32x4(vacc0x0123, va0, vb0123c3); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma() 156 vacc1x0123 = __builtin_wasm_fma_f32x4(vacc1x0123, va1, vb0123c3); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma() 157 vacc2x0123 = __builtin_wasm_fma_f32x4(vacc2x0123, va2, vb0123c3); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma() 158 vacc3x0123 = __builtin_wasm_fma_f32x4(vacc3x0123, va3, vb0123c3); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma() 159 vacc4x0123 = __builtin_wasm_fma_f32x4(vacc4x0123, va4, vb0123c3); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma() 244 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma() local 247 …uiltin_wasm_fma_f32x4(vacc0x0123, wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma() 248 …uiltin_wasm_fma_f32x4(vacc1x0123, wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma() 249 …uiltin_wasm_fma_f32x4(vacc2x0123, wasm_v128_andnot(va2, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmrelaxedsimd_fma() [all …]
|
D | 6x8s4-relu-wasmrelaxedsimd-fma.c | 171 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma() local 174 vacc0x0123 = __builtin_wasm_fma_f32x4(vacc0x0123, va0, vb0123c3); in xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma() 175 vacc1x0123 = __builtin_wasm_fma_f32x4(vacc1x0123, va1, vb0123c3); in xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma() 176 vacc2x0123 = __builtin_wasm_fma_f32x4(vacc2x0123, va2, vb0123c3); in xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma() 177 vacc3x0123 = __builtin_wasm_fma_f32x4(vacc3x0123, va3, vb0123c3); in xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma() 178 vacc4x0123 = __builtin_wasm_fma_f32x4(vacc4x0123, va4, vb0123c3); in xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma() 179 vacc5x0123 = __builtin_wasm_fma_f32x4(vacc5x0123, va5, vb0123c3); in xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma() 276 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma() local 279 …uiltin_wasm_fma_f32x4(vacc0x0123, wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma() 280 …uiltin_wasm_fma_f32x4(vacc1x0123, wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_gemm_relu_ukernel_6x8s4__wasmrelaxedsimd_fma() [all …]
|
D | 6x8s4-wasmsimd.c | 171 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_gemm_ukernel_6x8s4__wasmsimd() local 174 vacc0x0123 = wasm_f32x4_add(vacc0x0123, wasm_f32x4_mul(va0, vb0123c3)); in xnn_f32_gemm_ukernel_6x8s4__wasmsimd() 175 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1, vb0123c3)); in xnn_f32_gemm_ukernel_6x8s4__wasmsimd() 176 vacc2x0123 = wasm_f32x4_add(vacc2x0123, wasm_f32x4_mul(va2, vb0123c3)); in xnn_f32_gemm_ukernel_6x8s4__wasmsimd() 177 vacc3x0123 = wasm_f32x4_add(vacc3x0123, wasm_f32x4_mul(va3, vb0123c3)); in xnn_f32_gemm_ukernel_6x8s4__wasmsimd() 178 vacc4x0123 = wasm_f32x4_add(vacc4x0123, wasm_f32x4_mul(va4, vb0123c3)); in xnn_f32_gemm_ukernel_6x8s4__wasmsimd() 179 vacc5x0123 = wasm_f32x4_add(vacc5x0123, wasm_f32x4_mul(va5, vb0123c3)); in xnn_f32_gemm_ukernel_6x8s4__wasmsimd() 276 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_gemm_ukernel_6x8s4__wasmsimd() local 279 …4_add(vacc0x0123, wasm_f32x4_mul(wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3)); in xnn_f32_gemm_ukernel_6x8s4__wasmsimd() 280 …4_add(vacc1x0123, wasm_f32x4_mul(wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3)); in xnn_f32_gemm_ukernel_6x8s4__wasmsimd() [all …]
|
D | 6x8s4-wasmrelaxedsimd-fma.c | 171 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma() local 174 vacc0x0123 = __builtin_wasm_fma_f32x4(vacc0x0123, va0, vb0123c3); in xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma() 175 vacc1x0123 = __builtin_wasm_fma_f32x4(vacc1x0123, va1, vb0123c3); in xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma() 176 vacc2x0123 = __builtin_wasm_fma_f32x4(vacc2x0123, va2, vb0123c3); in xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma() 177 vacc3x0123 = __builtin_wasm_fma_f32x4(vacc3x0123, va3, vb0123c3); in xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma() 178 vacc4x0123 = __builtin_wasm_fma_f32x4(vacc4x0123, va4, vb0123c3); in xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma() 179 vacc5x0123 = __builtin_wasm_fma_f32x4(vacc5x0123, va5, vb0123c3); in xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma() 276 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma() local 279 …uiltin_wasm_fma_f32x4(vacc0x0123, wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma() 280 …uiltin_wasm_fma_f32x4(vacc1x0123, wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_gemm_ukernel_6x8s4__wasmrelaxedsimd_fma() [all …]
|
D | 6x8s4-minmax-wasmrelaxedsimd-fma.c | 173 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma() local 176 vacc0x0123 = __builtin_wasm_fma_f32x4(vacc0x0123, va0, vb0123c3); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma() 177 vacc1x0123 = __builtin_wasm_fma_f32x4(vacc1x0123, va1, vb0123c3); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma() 178 vacc2x0123 = __builtin_wasm_fma_f32x4(vacc2x0123, va2, vb0123c3); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma() 179 vacc3x0123 = __builtin_wasm_fma_f32x4(vacc3x0123, va3, vb0123c3); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma() 180 vacc4x0123 = __builtin_wasm_fma_f32x4(vacc4x0123, va4, vb0123c3); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma() 181 vacc5x0123 = __builtin_wasm_fma_f32x4(vacc5x0123, va5, vb0123c3); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma() 278 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma() local 281 …uiltin_wasm_fma_f32x4(vacc0x0123, wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma() 282 …uiltin_wasm_fma_f32x4(vacc1x0123, wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_gemm_minmax_ukernel_6x8s4__wasmrelaxedsimd_fma() [all …]
|
D | 4x8s4-wasmrelaxedsimd-fma.c | 133 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma() local 136 vacc0x0123 = __builtin_wasm_fma_f32x4(vacc0x0123, va0, vb0123c3); in xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma() 137 vacc1x0123 = __builtin_wasm_fma_f32x4(vacc1x0123, va1, vb0123c3); in xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma() 138 vacc2x0123 = __builtin_wasm_fma_f32x4(vacc2x0123, va2, vb0123c3); in xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma() 139 vacc3x0123 = __builtin_wasm_fma_f32x4(vacc3x0123, va3, vb0123c3); in xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma() 212 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma() local 215 …uiltin_wasm_fma_f32x4(vacc0x0123, wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma() 216 …uiltin_wasm_fma_f32x4(vacc1x0123, wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma() 217 …uiltin_wasm_fma_f32x4(vacc2x0123, wasm_v128_andnot(va2, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma() 218 …uiltin_wasm_fma_f32x4(vacc3x0123, wasm_v128_andnot(va3, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_gemm_ukernel_4x8s4__wasmrelaxedsimd_fma()
|
D | 3x8s4-wasmrelaxedsimd-fma.c | 114 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_gemm_ukernel_3x8s4__wasmrelaxedsimd_fma() local 117 vacc0x0123 = __builtin_wasm_fma_f32x4(vacc0x0123, va0, vb0123c3); in xnn_f32_gemm_ukernel_3x8s4__wasmrelaxedsimd_fma() 118 vacc1x0123 = __builtin_wasm_fma_f32x4(vacc1x0123, va1, vb0123c3); in xnn_f32_gemm_ukernel_3x8s4__wasmrelaxedsimd_fma() 119 vacc2x0123 = __builtin_wasm_fma_f32x4(vacc2x0123, va2, vb0123c3); in xnn_f32_gemm_ukernel_3x8s4__wasmrelaxedsimd_fma() 180 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_gemm_ukernel_3x8s4__wasmrelaxedsimd_fma() local 183 …uiltin_wasm_fma_f32x4(vacc0x0123, wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_gemm_ukernel_3x8s4__wasmrelaxedsimd_fma() 184 …uiltin_wasm_fma_f32x4(vacc1x0123, wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_gemm_ukernel_3x8s4__wasmrelaxedsimd_fma() 185 …uiltin_wasm_fma_f32x4(vacc2x0123, wasm_v128_andnot(va2, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_gemm_ukernel_3x8s4__wasmrelaxedsimd_fma()
|
D | 4x8s4-wasmsimd.c | 133 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_gemm_ukernel_4x8s4__wasmsimd() local 136 vacc0x0123 = wasm_f32x4_add(vacc0x0123, wasm_f32x4_mul(va0, vb0123c3)); in xnn_f32_gemm_ukernel_4x8s4__wasmsimd() 137 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1, vb0123c3)); in xnn_f32_gemm_ukernel_4x8s4__wasmsimd() 138 vacc2x0123 = wasm_f32x4_add(vacc2x0123, wasm_f32x4_mul(va2, vb0123c3)); in xnn_f32_gemm_ukernel_4x8s4__wasmsimd() 139 vacc3x0123 = wasm_f32x4_add(vacc3x0123, wasm_f32x4_mul(va3, vb0123c3)); in xnn_f32_gemm_ukernel_4x8s4__wasmsimd() 212 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_gemm_ukernel_4x8s4__wasmsimd() local 215 …4_add(vacc0x0123, wasm_f32x4_mul(wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3)); in xnn_f32_gemm_ukernel_4x8s4__wasmsimd() 216 …4_add(vacc1x0123, wasm_f32x4_mul(wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3)); in xnn_f32_gemm_ukernel_4x8s4__wasmsimd() 217 …4_add(vacc2x0123, wasm_f32x4_mul(wasm_v128_andnot(va2, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3)); in xnn_f32_gemm_ukernel_4x8s4__wasmsimd() 218 …4_add(vacc3x0123, wasm_f32x4_mul(wasm_v128_andnot(va3, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3)); in xnn_f32_gemm_ukernel_4x8s4__wasmsimd()
|
D | 4x8s4-relu-wasmrelaxedsimd-fma.c | 133 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma() local 136 vacc0x0123 = __builtin_wasm_fma_f32x4(vacc0x0123, va0, vb0123c3); in xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma() 137 vacc1x0123 = __builtin_wasm_fma_f32x4(vacc1x0123, va1, vb0123c3); in xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma() 138 vacc2x0123 = __builtin_wasm_fma_f32x4(vacc2x0123, va2, vb0123c3); in xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma() 139 vacc3x0123 = __builtin_wasm_fma_f32x4(vacc3x0123, va3, vb0123c3); in xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma() 212 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma() local 215 …uiltin_wasm_fma_f32x4(vacc0x0123, wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma() 216 …uiltin_wasm_fma_f32x4(vacc1x0123, wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma() 217 …uiltin_wasm_fma_f32x4(vacc2x0123, wasm_v128_andnot(va2, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma() 218 …uiltin_wasm_fma_f32x4(vacc3x0123, wasm_v128_andnot(va3, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_gemm_relu_ukernel_4x8s4__wasmrelaxedsimd_fma()
|
D | 5x8s4-minmax-wasmrelaxedsimd.c | 154 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmrelaxedsimd() local 157 vacc0x0123 = wasm_f32x4_add(vacc0x0123, wasm_f32x4_mul(va0, vb0123c3)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmrelaxedsimd() 158 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1, vb0123c3)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmrelaxedsimd() 159 vacc2x0123 = wasm_f32x4_add(vacc2x0123, wasm_f32x4_mul(va2, vb0123c3)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmrelaxedsimd() 160 vacc3x0123 = wasm_f32x4_add(vacc3x0123, wasm_f32x4_mul(va3, vb0123c3)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmrelaxedsimd() 161 vacc4x0123 = wasm_f32x4_add(vacc4x0123, wasm_f32x4_mul(va4, vb0123c3)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmrelaxedsimd() 246 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmrelaxedsimd() local 249 …4_add(vacc0x0123, wasm_f32x4_mul(wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmrelaxedsimd() 250 …4_add(vacc1x0123, wasm_f32x4_mul(wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmrelaxedsimd() 251 …4_add(vacc2x0123, wasm_f32x4_mul(wasm_v128_andnot(va2, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmrelaxedsimd() [all …]
|
D | 5x8s4-minmax-wasmsimd-arm.c | 154 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_arm() local 157 vacc0x0123 = wasm_f32x4_add(vacc0x0123, wasm_f32x4_mul(va0, vb0123c3)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_arm() 158 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1, vb0123c3)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_arm() 159 vacc2x0123 = wasm_f32x4_add(vacc2x0123, wasm_f32x4_mul(va2, vb0123c3)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_arm() 160 vacc3x0123 = wasm_f32x4_add(vacc3x0123, wasm_f32x4_mul(va3, vb0123c3)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_arm() 161 vacc4x0123 = wasm_f32x4_add(vacc4x0123, wasm_f32x4_mul(va4, vb0123c3)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_arm() 246 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_arm() local 249 …4_add(vacc0x0123, wasm_f32x4_mul(wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_arm() 250 …4_add(vacc1x0123, wasm_f32x4_mul(wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_arm() 251 …4_add(vacc2x0123, wasm_f32x4_mul(wasm_v128_andnot(va2, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3)); in xnn_f32_gemm_minmax_ukernel_5x8s4__wasmsimd_arm() [all …]
|
D | 5x8s4-relu-wasmsimd.c | 152 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd() local 155 vacc0x0123 = wasm_f32x4_add(vacc0x0123, wasm_f32x4_mul(va0, vb0123c3)); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd() 156 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1, vb0123c3)); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd() 157 vacc2x0123 = wasm_f32x4_add(vacc2x0123, wasm_f32x4_mul(va2, vb0123c3)); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd() 158 vacc3x0123 = wasm_f32x4_add(vacc3x0123, wasm_f32x4_mul(va3, vb0123c3)); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd() 159 vacc4x0123 = wasm_f32x4_add(vacc4x0123, wasm_f32x4_mul(va4, vb0123c3)); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd() 244 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd() local 247 …4_add(vacc0x0123, wasm_f32x4_mul(wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3)); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd() 248 …4_add(vacc1x0123, wasm_f32x4_mul(wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3)); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd() 249 …4_add(vacc2x0123, wasm_f32x4_mul(wasm_v128_andnot(va2, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3)); in xnn_f32_gemm_relu_ukernel_5x8s4__wasmsimd() [all …]
|
/external/XNNPACK/src/f32-gemm/gen-inc/ |
D | 5x8s4inc-minmax-wasmsimd-x86.c | 156 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86() local 159 vacc0x0123 = wasm_f32x4_add(vacc0x0123, wasm_f32x4_mul(va0, vb0123c3)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86() 160 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1, vb0123c3)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86() 161 vacc2x0123 = wasm_f32x4_add(vacc2x0123, wasm_f32x4_mul(va2, vb0123c3)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86() 162 vacc3x0123 = wasm_f32x4_add(vacc3x0123, wasm_f32x4_mul(va3, vb0123c3)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86() 163 vacc4x0123 = wasm_f32x4_add(vacc4x0123, wasm_f32x4_mul(va4, vb0123c3)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86() 248 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86() local 251 …4_add(vacc0x0123, wasm_f32x4_mul(wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86() 252 …4_add(vacc1x0123, wasm_f32x4_mul(wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86() 253 …4_add(vacc2x0123, wasm_f32x4_mul(wasm_v128_andnot(va2, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_x86() [all …]
|
D | 5x8s4inc-minmax-wasmrelaxedsimd-fma.c | 156 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma() local 159 vacc0x0123 = __builtin_wasm_fma_f32x4(vacc0x0123, va0, vb0123c3); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma() 160 vacc1x0123 = __builtin_wasm_fma_f32x4(vacc1x0123, va1, vb0123c3); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma() 161 vacc2x0123 = __builtin_wasm_fma_f32x4(vacc2x0123, va2, vb0123c3); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma() 162 vacc3x0123 = __builtin_wasm_fma_f32x4(vacc3x0123, va3, vb0123c3); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma() 163 vacc4x0123 = __builtin_wasm_fma_f32x4(vacc4x0123, va4, vb0123c3); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma() 248 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma() local 251 …uiltin_wasm_fma_f32x4(vacc0x0123, wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma() 252 …uiltin_wasm_fma_f32x4(vacc1x0123, wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma() 253 …uiltin_wasm_fma_f32x4(vacc2x0123, wasm_v128_andnot(va2, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmrelaxedsimd_fma() [all …]
|
D | 5x8s4inc-minmax-wasmsimd-arm.c | 156 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm() local 159 vacc0x0123 = wasm_f32x4_add(vacc0x0123, wasm_f32x4_mul(va0, vb0123c3)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm() 160 vacc1x0123 = wasm_f32x4_add(vacc1x0123, wasm_f32x4_mul(va1, vb0123c3)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm() 161 vacc2x0123 = wasm_f32x4_add(vacc2x0123, wasm_f32x4_mul(va2, vb0123c3)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm() 162 vacc3x0123 = wasm_f32x4_add(vacc3x0123, wasm_f32x4_mul(va3, vb0123c3)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm() 163 vacc4x0123 = wasm_f32x4_add(vacc4x0123, wasm_f32x4_mul(va4, vb0123c3)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm() 248 const v128_t vb0123c3 = wasm_v128_load(w + 24); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm() local 251 …4_add(vacc0x0123, wasm_f32x4_mul(wasm_v128_andnot(va0, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm() 252 …4_add(vacc1x0123, wasm_f32x4_mul(wasm_v128_andnot(va1, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm() 253 …4_add(vacc2x0123, wasm_f32x4_mul(wasm_v128_andnot(va2, wasm_f32x4_eq(vb0123c3, vzero)), vb0123c3)); in xnn_f32_gemminc_minmax_ukernel_5x8s4__wasmsimd_arm() [all …]
|