/external/XNNPACK/src/f32-gemm/gen/ |
D | 4x2c4-wasmsimd.c | 65 v128_t vacc2x1c4 = vacc0x1c4; in xnn_f32_gemm_ukernel_4x2c4__wasmsimd() local 90 vacc2x1c4 = wasm_f32x4_add(vacc2x1c4, wasm_f32x4_mul(va2, vb1)); in xnn_f32_gemm_ukernel_4x2c4__wasmsimd() 117 vacc2x1c4 = wasm_f32x4_add(vacc2x1c4, wasm_f32x4_mul(wasm_v128_andnot(va2, vmask1), vb1)); in xnn_f32_gemm_ukernel_4x2c4__wasmsimd() 129 wasm_v32x4_shuffle(vacc2x0c4, vacc2x1c4, 0, 4, 1, 5), in xnn_f32_gemm_ukernel_4x2c4__wasmsimd() 130 wasm_v32x4_shuffle(vacc2x0c4, vacc2x1c4, 2, 6, 3, 7)); in xnn_f32_gemm_ukernel_4x2c4__wasmsimd()
|
D | 4x2c4-minmax-sse.c | 65 __m128 vacc2x1c4 = vacc0x1c4; in xnn_f32_gemm_minmax_ukernel_4x2c4__sse() local 90 vacc2x1c4 = _mm_add_ps(vacc2x1c4, _mm_mul_ps(va2, vb1)); in xnn_f32_gemm_minmax_ukernel_4x2c4__sse() 116 vacc2x1c4 = _mm_add_ps(vacc2x1c4, _mm_mul_ps(_mm_andnot_ps(vmask1, va2), vb1)); in xnn_f32_gemm_minmax_ukernel_4x2c4__sse() 123 … vacc2x01c2 = _mm_add_ps(_mm_unpacklo_ps(vacc2x0c4, vacc2x1c4), _mm_unpackhi_ps(vacc2x0c4, vacc2x1… in xnn_f32_gemm_minmax_ukernel_4x2c4__sse()
|
D | 4x2c4-relu-wasmsimd.c | 65 v128_t vacc2x1c4 = vacc0x1c4; in xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd() local 90 vacc2x1c4 = wasm_f32x4_add(vacc2x1c4, wasm_f32x4_mul(va2, vb1)); in xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd() 117 vacc2x1c4 = wasm_f32x4_add(vacc2x1c4, wasm_f32x4_mul(wasm_v128_andnot(va2, vmask1), vb1)); in xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd() 129 wasm_v32x4_shuffle(vacc2x0c4, vacc2x1c4, 0, 4, 1, 5), in xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd() 130 wasm_v32x4_shuffle(vacc2x0c4, vacc2x1c4, 2, 6, 3, 7)); in xnn_f32_gemm_relu_ukernel_4x2c4__wasmsimd()
|
D | 4x2c4-minmax-wasmsimd-arm.c | 67 v128_t vacc2x1c4 = vacc0x1c4; in xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_arm() local 92 vacc2x1c4 = wasm_f32x4_add(vacc2x1c4, wasm_f32x4_mul(va2, vb1)); in xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_arm() 119 vacc2x1c4 = wasm_f32x4_add(vacc2x1c4, wasm_f32x4_mul(wasm_v128_andnot(va2, vmask1), vb1)); in xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_arm() 131 wasm_v32x4_shuffle(vacc2x0c4, vacc2x1c4, 0, 4, 1, 5), in xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_arm() 132 wasm_v32x4_shuffle(vacc2x0c4, vacc2x1c4, 2, 6, 3, 7)); in xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_arm()
|
D | 4x2c4-minmax-wasmsimd-x86.c | 65 v128_t vacc2x1c4 = vacc0x1c4; in xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_x86() local 90 vacc2x1c4 = wasm_f32x4_add(vacc2x1c4, wasm_f32x4_mul(va2, vb1)); in xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_x86() 117 vacc2x1c4 = wasm_f32x4_add(vacc2x1c4, wasm_f32x4_mul(wasm_v128_andnot(va2, vmask1), vb1)); in xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_x86() 129 wasm_v32x4_shuffle(vacc2x0c4, vacc2x1c4, 0, 4, 1, 5), in xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_x86() 130 wasm_v32x4_shuffle(vacc2x0c4, vacc2x1c4, 2, 6, 3, 7)); in xnn_f32_gemm_minmax_ukernel_4x2c4__wasmsimd_x86()
|
/external/XNNPACK/src/f32-igemm/gen/ |
D | 4x2c4-relu-wasmsimd.c | 63 v128_t vacc2x1c4 = vacc0x1c4; in xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd() local 112 vacc2x1c4 = wasm_f32x4_add(vacc2x1c4, wasm_f32x4_mul(va2, vb1)); in xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd() 135 vacc2x1c4 = wasm_f32x4_add(vacc2x1c4, wasm_f32x4_mul(wasm_v128_andnot(va2, vmask1), vb1)); in xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd() 149 wasm_v32x4_shuffle(vacc2x0c4, vacc2x1c4, 0, 4, 1, 5), in xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd() 150 wasm_v32x4_shuffle(vacc2x0c4, vacc2x1c4, 2, 6, 3, 7)); in xnn_f32_igemm_relu_ukernel_4x2c4__wasmsimd()
|
D | 4x2c4-wasmsimd.c | 63 v128_t vacc2x1c4 = vacc0x1c4; in xnn_f32_igemm_ukernel_4x2c4__wasmsimd() local 112 vacc2x1c4 = wasm_f32x4_add(vacc2x1c4, wasm_f32x4_mul(va2, vb1)); in xnn_f32_igemm_ukernel_4x2c4__wasmsimd() 135 vacc2x1c4 = wasm_f32x4_add(vacc2x1c4, wasm_f32x4_mul(wasm_v128_andnot(va2, vmask1), vb1)); in xnn_f32_igemm_ukernel_4x2c4__wasmsimd() 149 wasm_v32x4_shuffle(vacc2x0c4, vacc2x1c4, 0, 4, 1, 5), in xnn_f32_igemm_ukernel_4x2c4__wasmsimd() 150 wasm_v32x4_shuffle(vacc2x0c4, vacc2x1c4, 2, 6, 3, 7)); in xnn_f32_igemm_ukernel_4x2c4__wasmsimd()
|
D | 4x2c4-minmax-wasmsimd-x86.c | 63 v128_t vacc2x1c4 = vacc0x1c4; in xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86() local 112 vacc2x1c4 = wasm_f32x4_add(vacc2x1c4, wasm_f32x4_mul(va2, vb1)); in xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86() 135 vacc2x1c4 = wasm_f32x4_add(vacc2x1c4, wasm_f32x4_mul(wasm_v128_andnot(va2, vmask1), vb1)); in xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86() 149 wasm_v32x4_shuffle(vacc2x0c4, vacc2x1c4, 0, 4, 1, 5), in xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86() 150 wasm_v32x4_shuffle(vacc2x0c4, vacc2x1c4, 2, 6, 3, 7)); in xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_x86()
|
D | 4x2c4-minmax-wasmsimd-arm.c | 65 v128_t vacc2x1c4 = vacc0x1c4; in xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm() local 114 vacc2x1c4 = wasm_f32x4_add(vacc2x1c4, wasm_f32x4_mul(va2, vb1)); in xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm() 137 vacc2x1c4 = wasm_f32x4_add(vacc2x1c4, wasm_f32x4_mul(wasm_v128_andnot(va2, vmask1), vb1)); in xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm() 151 wasm_v32x4_shuffle(vacc2x0c4, vacc2x1c4, 0, 4, 1, 5), in xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm() 152 wasm_v32x4_shuffle(vacc2x0c4, vacc2x1c4, 2, 6, 3, 7)); in xnn_f32_igemm_minmax_ukernel_4x2c4__wasmsimd_arm()
|
D | 4x2c4-minmax-sse.c | 63 __m128 vacc2x1c4 = vacc0x1c4; in xnn_f32_igemm_minmax_ukernel_4x2c4__sse() local 112 vacc2x1c4 = _mm_add_ps(vacc2x1c4, _mm_mul_ps(va2, vb1)); in xnn_f32_igemm_minmax_ukernel_4x2c4__sse() 134 vacc2x1c4 = _mm_add_ps(vacc2x1c4, _mm_mul_ps(_mm_andnot_ps(vmask1, va2), vb1)); in xnn_f32_igemm_minmax_ukernel_4x2c4__sse() 143 … vacc2x01c2 = _mm_add_ps(_mm_unpacklo_ps(vacc2x0c4, vacc2x1c4), _mm_unpackhi_ps(vacc2x0c4, vacc2x1… in xnn_f32_igemm_minmax_ukernel_4x2c4__sse()
|