/external/XNNPACK/src/qs8-requantization/ |
D | precise-sse4.c | 46 const __m128i vrounding = _mm_set1_epi64x(rounding); in xnn_qs8_requantize_precise__sse4() local 74 const __m128i x_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(x_absmul02, vrounding), vshiftlo); in xnn_qs8_requantize_precise__sse4() 75 const __m128i x_abs_scaled13 = _mm_srl_epi32(_mm_add_epi64(x_absmul13, vrounding), vshifthi); in xnn_qs8_requantize_precise__sse4() 76 const __m128i y_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(y_absmul02, vrounding), vshiftlo); in xnn_qs8_requantize_precise__sse4() 77 const __m128i y_abs_scaled13 = _mm_srl_epi32(_mm_add_epi64(y_absmul13, vrounding), vshifthi); in xnn_qs8_requantize_precise__sse4() 78 const __m128i z_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(z_absmul02, vrounding), vshiftlo); in xnn_qs8_requantize_precise__sse4() 79 const __m128i z_abs_scaled13 = _mm_srl_epi32(_mm_add_epi64(z_absmul13, vrounding), vshifthi); in xnn_qs8_requantize_precise__sse4() 80 const __m128i w_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(w_absmul02, vrounding), vshiftlo); in xnn_qs8_requantize_precise__sse4() 81 const __m128i w_abs_scaled13 = _mm_srl_epi32(_mm_add_epi64(w_absmul13, vrounding), vshifthi); in xnn_qs8_requantize_precise__sse4()
|
D | precise-ssse3.c | 45 const __m128i vrounding = _mm_set1_epi64x(rounding); in xnn_qs8_requantize_precise__ssse3() local 73 const __m128i x_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(x_absmul02, vrounding), vshift); in xnn_qs8_requantize_precise__ssse3() 74 const __m128i x_abs_scaled13 = _mm_srl_epi64(_mm_add_epi64(x_absmul13, vrounding), vshift); in xnn_qs8_requantize_precise__ssse3() 75 const __m128i y_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(y_absmul02, vrounding), vshift); in xnn_qs8_requantize_precise__ssse3() 76 const __m128i y_abs_scaled13 = _mm_srl_epi64(_mm_add_epi64(y_absmul13, vrounding), vshift); in xnn_qs8_requantize_precise__ssse3() 77 const __m128i z_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(z_absmul02, vrounding), vshift); in xnn_qs8_requantize_precise__ssse3() 78 const __m128i z_abs_scaled13 = _mm_srl_epi64(_mm_add_epi64(z_absmul13, vrounding), vshift); in xnn_qs8_requantize_precise__ssse3() 79 const __m128i w_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(w_absmul02, vrounding), vshift); in xnn_qs8_requantize_precise__ssse3() 80 const __m128i w_abs_scaled13 = _mm_srl_epi64(_mm_add_epi64(w_absmul13, vrounding), vshift); in xnn_qs8_requantize_precise__ssse3()
|
D | precise-sse2.c | 45 const __m128i vrounding = _mm_set1_epi64x(rounding); in xnn_qs8_requantize_precise__sse2() local 78 const __m128i x_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(x_absmul02, vrounding), vshift); in xnn_qs8_requantize_precise__sse2() 79 const __m128i x_abs_scaled13 = _mm_srl_epi64(_mm_add_epi64(x_absmul13, vrounding), vshift); in xnn_qs8_requantize_precise__sse2() 80 const __m128i y_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(y_absmul02, vrounding), vshift); in xnn_qs8_requantize_precise__sse2() 81 const __m128i y_abs_scaled13 = _mm_srl_epi64(_mm_add_epi64(y_absmul13, vrounding), vshift); in xnn_qs8_requantize_precise__sse2() 82 const __m128i z_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(z_absmul02, vrounding), vshift); in xnn_qs8_requantize_precise__sse2() 83 const __m128i z_abs_scaled13 = _mm_srl_epi64(_mm_add_epi64(z_absmul13, vrounding), vshift); in xnn_qs8_requantize_precise__sse2() 84 const __m128i w_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(w_absmul02, vrounding), vshift); in xnn_qs8_requantize_precise__sse2() 85 const __m128i w_abs_scaled13 = _mm_srl_epi64(_mm_add_epi64(w_absmul13, vrounding), vshift); in xnn_qs8_requantize_precise__sse2()
|
/external/XNNPACK/src/qu8-requantization/ |
D | precise-sse4.c | 46 const __m128i vrounding = _mm_set1_epi64x(rounding); in xnn_qu8_requantize_precise__sse4() local 74 const __m128i x_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(x_absmul02, vrounding), vshiftlo); in xnn_qu8_requantize_precise__sse4() 75 const __m128i x_abs_scaled13 = _mm_srl_epi32(_mm_add_epi64(x_absmul13, vrounding), vshifthi); in xnn_qu8_requantize_precise__sse4() 76 const __m128i y_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(y_absmul02, vrounding), vshiftlo); in xnn_qu8_requantize_precise__sse4() 77 const __m128i y_abs_scaled13 = _mm_srl_epi32(_mm_add_epi64(y_absmul13, vrounding), vshifthi); in xnn_qu8_requantize_precise__sse4() 78 const __m128i z_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(z_absmul02, vrounding), vshiftlo); in xnn_qu8_requantize_precise__sse4() 79 const __m128i z_abs_scaled13 = _mm_srl_epi32(_mm_add_epi64(z_absmul13, vrounding), vshifthi); in xnn_qu8_requantize_precise__sse4() 80 const __m128i w_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(w_absmul02, vrounding), vshiftlo); in xnn_qu8_requantize_precise__sse4() 81 const __m128i w_abs_scaled13 = _mm_srl_epi32(_mm_add_epi64(w_absmul13, vrounding), vshifthi); in xnn_qu8_requantize_precise__sse4()
|
D | precise-ssse3.c | 45 const __m128i vrounding = _mm_set1_epi64x(rounding); in xnn_qu8_requantize_precise__ssse3() local 73 const __m128i x_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(x_absmul02, vrounding), vshift); in xnn_qu8_requantize_precise__ssse3() 74 const __m128i x_abs_scaled13 = _mm_srl_epi64(_mm_add_epi64(x_absmul13, vrounding), vshift); in xnn_qu8_requantize_precise__ssse3() 75 const __m128i y_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(y_absmul02, vrounding), vshift); in xnn_qu8_requantize_precise__ssse3() 76 const __m128i y_abs_scaled13 = _mm_srl_epi64(_mm_add_epi64(y_absmul13, vrounding), vshift); in xnn_qu8_requantize_precise__ssse3() 77 const __m128i z_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(z_absmul02, vrounding), vshift); in xnn_qu8_requantize_precise__ssse3() 78 const __m128i z_abs_scaled13 = _mm_srl_epi64(_mm_add_epi64(z_absmul13, vrounding), vshift); in xnn_qu8_requantize_precise__ssse3() 79 const __m128i w_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(w_absmul02, vrounding), vshift); in xnn_qu8_requantize_precise__ssse3() 80 const __m128i w_abs_scaled13 = _mm_srl_epi64(_mm_add_epi64(w_absmul13, vrounding), vshift); in xnn_qu8_requantize_precise__ssse3()
|
D | precise-sse2.c | 45 const __m128i vrounding = _mm_set1_epi64x(rounding); in xnn_qu8_requantize_precise__sse2() local 78 const __m128i x_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(x_absmul02, vrounding), vshift); in xnn_qu8_requantize_precise__sse2() 79 const __m128i x_abs_scaled13 = _mm_srl_epi64(_mm_add_epi64(x_absmul13, vrounding), vshift); in xnn_qu8_requantize_precise__sse2() 80 const __m128i y_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(y_absmul02, vrounding), vshift); in xnn_qu8_requantize_precise__sse2() 81 const __m128i y_abs_scaled13 = _mm_srl_epi64(_mm_add_epi64(y_absmul13, vrounding), vshift); in xnn_qu8_requantize_precise__sse2() 82 const __m128i z_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(z_absmul02, vrounding), vshift); in xnn_qu8_requantize_precise__sse2() 83 const __m128i z_abs_scaled13 = _mm_srl_epi64(_mm_add_epi64(z_absmul13, vrounding), vshift); in xnn_qu8_requantize_precise__sse2() 84 const __m128i w_abs_scaled02 = _mm_srl_epi64(_mm_add_epi64(w_absmul02, vrounding), vshift); in xnn_qu8_requantize_precise__sse2() 85 const __m128i w_abs_scaled13 = _mm_srl_epi64(_mm_add_epi64(w_absmul13, vrounding), vshift); in xnn_qu8_requantize_precise__sse2()
|
/external/XNNPACK/src/qs8-gavgpool/gen/ |
D | 7x-minmax-sse41-c24-acc2.c | 58 const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() local 147 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() 148 const __m128i vabsout13 = _mm_srl_epi64(_mm_add_epi64(vabsprod13, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() 149 const __m128i vabsout46 = _mm_srl_epi64(_mm_add_epi64(vabsprod46, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() 150 const __m128i vabsout57 = _mm_srl_epi64(_mm_add_epi64(vabsprod57, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() 151 const __m128i vabsout8A = _mm_srl_epi64(_mm_add_epi64(vabsprod8A, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() 152 const __m128i vabsout9B = _mm_srl_epi64(_mm_add_epi64(vabsprod9B, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() 153 const __m128i vabsoutCE = _mm_srl_epi64(_mm_add_epi64(vabsprodCE, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() 154 const __m128i vabsoutDF = _mm_srl_epi64(_mm_add_epi64(vabsprodDF, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() 155 const __m128i vabsoutGI = _mm_srl_epi64(_mm_add_epi64(vabsprodGI, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() [all …]
|
D | 7x-minmax-wasmsimd-c24-acc2.c | 58 const v128_t vrounding = wasm_v128_load(params->wasmsimd.rounding); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2() local 160 const v128_t vabsout01 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod01, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2() 161 const v128_t vabsout23 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod23, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2() 162 const v128_t vabsout45 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod45, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2() 163 const v128_t vabsout67 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod67, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2() 164 const v128_t vabsout89 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod89, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2() 165 const v128_t vabsoutAB = wasm_u64x2_shr(wasm_i64x2_add(vabsprodAB, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2() 166 const v128_t vabsoutCD = wasm_u64x2_shr(wasm_i64x2_add(vabsprodCD, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2() 167 const v128_t vabsoutEF = wasm_u64x2_shr(wasm_i64x2_add(vabsprodEF, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2() 168 const v128_t vabsoutGH = wasm_u64x2_shr(wasm_i64x2_add(vabsprodGH, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2() [all …]
|
D | 7x-minmax-sse41-c16-acc2.c | 58 const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2() local 124 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2() 125 const __m128i vabsout13 = _mm_srl_epi64(_mm_add_epi64(vabsprod13, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2() 126 const __m128i vabsout46 = _mm_srl_epi64(_mm_add_epi64(vabsprod46, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2() 127 const __m128i vabsout57 = _mm_srl_epi64(_mm_add_epi64(vabsprod57, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2() 128 const __m128i vabsout8A = _mm_srl_epi64(_mm_add_epi64(vabsprod8A, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2() 129 const __m128i vabsout9B = _mm_srl_epi64(_mm_add_epi64(vabsprod9B, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2() 130 const __m128i vabsoutCE = _mm_srl_epi64(_mm_add_epi64(vabsprodCE, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2() 131 const __m128i vabsoutDF = _mm_srl_epi64(_mm_add_epi64(vabsprodDF, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2() 201 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2() [all …]
|
D | 7x-minmax-wasmsimd-c16-acc2.c | 58 const v128_t vrounding = wasm_v128_load(params->wasmsimd.rounding); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c16_acc2() local 133 const v128_t vabsout01 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod01, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c16_acc2() 134 const v128_t vabsout23 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod23, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c16_acc2() 135 const v128_t vabsout45 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod45, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c16_acc2() 136 const v128_t vabsout67 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod67, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c16_acc2() 137 const v128_t vabsout89 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod89, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c16_acc2() 138 const v128_t vabsoutAB = wasm_u64x2_shr(wasm_i64x2_add(vabsprodAB, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c16_acc2() 139 const v128_t vabsoutCD = wasm_u64x2_shr(wasm_i64x2_add(vabsprodCD, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c16_acc2() 140 const v128_t vabsoutEF = wasm_u64x2_shr(wasm_i64x2_add(vabsprodEF, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c16_acc2() 211 const v128_t vabsout01 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod01, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c16_acc2() [all …]
|
D | 7x-minmax-ssse3-c24-acc2.c | 58 const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c24_acc2() local 171 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c24_acc2() 172 const __m128i vabsout13 = _mm_srl_epi64(_mm_add_epi64(vabsprod13, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c24_acc2() 173 const __m128i vabsout46 = _mm_srl_epi64(_mm_add_epi64(vabsprod46, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c24_acc2() 174 const __m128i vabsout57 = _mm_srl_epi64(_mm_add_epi64(vabsprod57, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c24_acc2() 175 const __m128i vabsout8A = _mm_srl_epi64(_mm_add_epi64(vabsprod8A, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c24_acc2() 176 const __m128i vabsout9B = _mm_srl_epi64(_mm_add_epi64(vabsprod9B, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c24_acc2() 177 const __m128i vabsoutCE = _mm_srl_epi64(_mm_add_epi64(vabsprodCE, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c24_acc2() 178 const __m128i vabsoutDF = _mm_srl_epi64(_mm_add_epi64(vabsprodDF, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c24_acc2() 179 const __m128i vabsoutGI = _mm_srl_epi64(_mm_add_epi64(vabsprodGI, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c24_acc2() [all …]
|
D | 7x-minmax-sse2-c24-acc2.c | 58 const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2() local 178 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2() 179 const __m128i vabsout13 = _mm_srl_epi64(_mm_add_epi64(vabsprod13, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2() 180 const __m128i vabsout46 = _mm_srl_epi64(_mm_add_epi64(vabsprod46, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2() 181 const __m128i vabsout57 = _mm_srl_epi64(_mm_add_epi64(vabsprod57, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2() 182 const __m128i vabsout8A = _mm_srl_epi64(_mm_add_epi64(vabsprod8A, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2() 183 const __m128i vabsout9B = _mm_srl_epi64(_mm_add_epi64(vabsprod9B, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2() 184 const __m128i vabsoutCE = _mm_srl_epi64(_mm_add_epi64(vabsprodCE, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2() 185 const __m128i vabsoutDF = _mm_srl_epi64(_mm_add_epi64(vabsprodDF, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2() 186 const __m128i vabsoutGI = _mm_srl_epi64(_mm_add_epi64(vabsprodGI, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2() [all …]
|
D | 7x-minmax-wasmsimd-c8-acc2.c | 58 const v128_t vrounding = wasm_v128_load(params->wasmsimd.rounding); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c8_acc2() local 106 const v128_t vabsout01 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod01, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c8_acc2() 107 const v128_t vabsout23 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod23, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c8_acc2() 108 const v128_t vabsout45 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod45, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c8_acc2() 109 const v128_t vabsout67 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod67, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c8_acc2() 175 const v128_t vabsout01 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod01, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c8_acc2() 176 const v128_t vabsout23 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod23, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c8_acc2() 177 const v128_t vabsout45 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod45, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c8_acc2() 178 const v128_t vabsout67 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod67, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c8_acc2()
|
D | 7x-minmax-sse41-c8-acc2.c | 58 const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c8_acc2() local 101 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c8_acc2() 102 const __m128i vabsout13 = _mm_srl_epi64(_mm_add_epi64(vabsprod13, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c8_acc2() 103 const __m128i vabsout46 = _mm_srl_epi64(_mm_add_epi64(vabsprod46, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c8_acc2() 104 const __m128i vabsout57 = _mm_srl_epi64(_mm_add_epi64(vabsprod57, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c8_acc2() 168 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c8_acc2() 169 const __m128i vabsout13 = _mm_srl_epi64(_mm_add_epi64(vabsprod13, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c8_acc2() 170 const __m128i vabsout46 = _mm_srl_epi64(_mm_add_epi64(vabsprod46, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c8_acc2() 171 const __m128i vabsout57 = _mm_srl_epi64(_mm_add_epi64(vabsprod57, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c8_acc2()
|
D | 7x-minmax-ssse3-c16-acc2.c | 58 const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c16_acc2() local 140 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c16_acc2() 141 const __m128i vabsout13 = _mm_srl_epi64(_mm_add_epi64(vabsprod13, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c16_acc2() 142 const __m128i vabsout46 = _mm_srl_epi64(_mm_add_epi64(vabsprod46, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c16_acc2() 143 const __m128i vabsout57 = _mm_srl_epi64(_mm_add_epi64(vabsprod57, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c16_acc2() 144 const __m128i vabsout8A = _mm_srl_epi64(_mm_add_epi64(vabsprod8A, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c16_acc2() 145 const __m128i vabsout9B = _mm_srl_epi64(_mm_add_epi64(vabsprod9B, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c16_acc2() 146 const __m128i vabsoutCE = _mm_srl_epi64(_mm_add_epi64(vabsprodCE, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c16_acc2() 147 const __m128i vabsoutDF = _mm_srl_epi64(_mm_add_epi64(vabsprodDF, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c16_acc2() 234 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c16_acc2() [all …]
|
D | 7x-minmax-sse2-c16-acc2.c | 58 const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c16_acc2() local 145 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c16_acc2() 146 const __m128i vabsout13 = _mm_srl_epi64(_mm_add_epi64(vabsprod13, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c16_acc2() 147 const __m128i vabsout46 = _mm_srl_epi64(_mm_add_epi64(vabsprod46, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c16_acc2() 148 const __m128i vabsout57 = _mm_srl_epi64(_mm_add_epi64(vabsprod57, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c16_acc2() 149 const __m128i vabsout8A = _mm_srl_epi64(_mm_add_epi64(vabsprod8A, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c16_acc2() 150 const __m128i vabsout9B = _mm_srl_epi64(_mm_add_epi64(vabsprod9B, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c16_acc2() 151 const __m128i vabsoutCE = _mm_srl_epi64(_mm_add_epi64(vabsprodCE, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c16_acc2() 152 const __m128i vabsoutDF = _mm_srl_epi64(_mm_add_epi64(vabsprodDF, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c16_acc2() 242 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c16_acc2() [all …]
|
D | 7x-minmax-ssse3-c8-acc2.c | 58 const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c8_acc2() local 109 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c8_acc2() 110 const __m128i vabsout13 = _mm_srl_epi64(_mm_add_epi64(vabsprod13, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c8_acc2() 111 const __m128i vabsout46 = _mm_srl_epi64(_mm_add_epi64(vabsprod46, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c8_acc2() 112 const __m128i vabsout57 = _mm_srl_epi64(_mm_add_epi64(vabsprod57, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c8_acc2() 189 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c8_acc2() 190 const __m128i vabsout13 = _mm_srl_epi64(_mm_add_epi64(vabsprod13, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c8_acc2() 191 const __m128i vabsout46 = _mm_srl_epi64(_mm_add_epi64(vabsprod46, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c8_acc2() 192 const __m128i vabsout57 = _mm_srl_epi64(_mm_add_epi64(vabsprod57, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c8_acc2()
|
D | 7x-minmax-sse2-c8-acc2.c | 58 const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c8_acc2() local 112 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c8_acc2() 113 const __m128i vabsout13 = _mm_srl_epi64(_mm_add_epi64(vabsprod13, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c8_acc2() 114 const __m128i vabsout46 = _mm_srl_epi64(_mm_add_epi64(vabsprod46, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c8_acc2() 115 const __m128i vabsout57 = _mm_srl_epi64(_mm_add_epi64(vabsprod57, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c8_acc2() 195 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c8_acc2() 196 const __m128i vabsout13 = _mm_srl_epi64(_mm_add_epi64(vabsprod13, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c8_acc2() 197 const __m128i vabsout46 = _mm_srl_epi64(_mm_add_epi64(vabsprod46, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c8_acc2() 198 const __m128i vabsout57 = _mm_srl_epi64(_mm_add_epi64(vabsprod57, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c8_acc2()
|
D | 7p7x-minmax-wasmsimd-c16-acc2.c | 184 const v128_t vrounding = wasm_v128_load(params->wasmsimd.rounding); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c16_acc2() local 260 const v128_t vabsout01 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod01, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c16_acc2() 261 const v128_t vabsout23 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod23, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c16_acc2() 262 const v128_t vabsout45 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod45, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c16_acc2() 263 const v128_t vabsout67 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod67, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c16_acc2() 264 const v128_t vabsout89 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod89, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c16_acc2() 265 const v128_t vabsoutAB = wasm_u64x2_shr(wasm_i64x2_add(vabsprodAB, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c16_acc2() 266 const v128_t vabsoutCD = wasm_u64x2_shr(wasm_i64x2_add(vabsprodCD, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c16_acc2() 267 const v128_t vabsoutEF = wasm_u64x2_shr(wasm_i64x2_add(vabsprodEF, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c16_acc2() 339 const v128_t vabsout01 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod01, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c16_acc2() [all …]
|
D | 7p7x-minmax-sse41-c16-acc2.c | 186 const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2() local 253 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2() 254 const __m128i vabsout13 = _mm_srl_epi64(_mm_add_epi64(vabsprod13, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2() 255 const __m128i vabsout46 = _mm_srl_epi64(_mm_add_epi64(vabsprod46, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2() 256 const __m128i vabsout57 = _mm_srl_epi64(_mm_add_epi64(vabsprod57, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2() 257 const __m128i vabsout8A = _mm_srl_epi64(_mm_add_epi64(vabsprod8A, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2() 258 const __m128i vabsout9B = _mm_srl_epi64(_mm_add_epi64(vabsprod9B, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2() 259 const __m128i vabsoutCE = _mm_srl_epi64(_mm_add_epi64(vabsprodCE, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2() 260 const __m128i vabsoutDF = _mm_srl_epi64(_mm_add_epi64(vabsprodDF, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2() 331 const __m128i vabsout02 = _mm_srl_epi64(_mm_add_epi64(vabsprod02, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2() [all …]
|
D | 7p7x-minmax-wasmsimd-c24-acc2.c | 292 const v128_t vrounding = wasm_v128_load(params->wasmsimd.rounding); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2() local 395 const v128_t vabsout01 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod01, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2() 396 const v128_t vabsout23 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod23, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2() 397 const v128_t vabsout45 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod45, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2() 398 const v128_t vabsout67 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod67, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2() 399 const v128_t vabsout89 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod89, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2() 400 const v128_t vabsoutAB = wasm_u64x2_shr(wasm_i64x2_add(vabsprodAB, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2() 401 const v128_t vabsoutCD = wasm_u64x2_shr(wasm_i64x2_add(vabsprodCD, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2() 402 const v128_t vabsoutEF = wasm_u64x2_shr(wasm_i64x2_add(vabsprodEF, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2() 403 const v128_t vabsoutGH = wasm_u64x2_shr(wasm_i64x2_add(vabsprodGH, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2() [all …]
|
D | 7p7x-minmax-wasmsimd-c8-acc2.c | 150 const v128_t vrounding = wasm_v128_load(params->wasmsimd.rounding); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2() local 199 const v128_t vabsout01 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod01, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2() 200 const v128_t vabsout23 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod23, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2() 201 const v128_t vabsout45 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod45, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2() 202 const v128_t vabsout67 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod67, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2() 269 const v128_t vabsout01 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod01, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2() 270 const v128_t vabsout23 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod23, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2() 271 const v128_t vabsout45 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod45, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2() 272 const v128_t vabsout67 = wasm_u64x2_shr(wasm_i64x2_add(vabsprod67, vrounding), vshift); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2()
|
/external/XNNPACK/src/qu8-gavgpool/ |
D | 7x-minmax-sse2-c8.c | 58 const __m128i vrounding = _mm_load_si128((const __m128i*) params->sse2.rounding); in xnn_qu8_gavgpool_minmax_ukernel_7x__sse2_c8() local 104 …const __m128i vabs_scaled_lo02 = _mm_srl_epi64(_mm_add_epi64(vabsmul_lo02, vrounding), vright_shif… in xnn_qu8_gavgpool_minmax_ukernel_7x__sse2_c8() 105 …const __m128i vabs_scaled_lo13 = _mm_srl_epi64(_mm_add_epi64(vabsmul_lo13, vrounding), vright_shif… in xnn_qu8_gavgpool_minmax_ukernel_7x__sse2_c8() 106 …const __m128i vabs_scaled_hi02 = _mm_srl_epi64(_mm_add_epi64(vabsmul_hi02, vrounding), vright_shif… in xnn_qu8_gavgpool_minmax_ukernel_7x__sse2_c8() 107 …const __m128i vabs_scaled_hi13 = _mm_srl_epi64(_mm_add_epi64(vabsmul_hi13, vrounding), vright_shif… in xnn_qu8_gavgpool_minmax_ukernel_7x__sse2_c8() 173 …const __m128i vabs_scaled_lo02 = _mm_srl_epi64(_mm_add_epi64(vabsmul_lo02, vrounding), vright_shif… in xnn_qu8_gavgpool_minmax_ukernel_7x__sse2_c8() 174 …const __m128i vabs_scaled_lo13 = _mm_srl_epi64(_mm_add_epi64(vabsmul_lo13, vrounding), vright_shif… in xnn_qu8_gavgpool_minmax_ukernel_7x__sse2_c8() 175 …const __m128i vabs_scaled_hi02 = _mm_srl_epi64(_mm_add_epi64(vabsmul_hi02, vrounding), vright_shif… in xnn_qu8_gavgpool_minmax_ukernel_7x__sse2_c8() 176 …const __m128i vabs_scaled_hi13 = _mm_srl_epi64(_mm_add_epi64(vabsmul_hi13, vrounding), vright_shif… in xnn_qu8_gavgpool_minmax_ukernel_7x__sse2_c8()
|
/external/XNNPACK/src/qs8-dwconv/gen/ |
D | up24x9-minmax-wasmsimd-mul16.c | 285 const v128_t vrounding = wasm_v128_load(params->wasmsimd.rounding); in xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16() local 286 const v128_t vprod01 = wasm_i64x2_add(wasm_i64x2_mul(vacc01, vmultiplier), vrounding); in xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16() 287 const v128_t vprod23 = wasm_i64x2_add(wasm_i64x2_mul(vacc23, vmultiplier), vrounding); in xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16() 288 const v128_t vprod45 = wasm_i64x2_add(wasm_i64x2_mul(vacc45, vmultiplier), vrounding); in xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16() 289 const v128_t vprod67 = wasm_i64x2_add(wasm_i64x2_mul(vacc67, vmultiplier), vrounding); in xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16() 290 const v128_t vprod89 = wasm_i64x2_add(wasm_i64x2_mul(vacc89, vmultiplier), vrounding); in xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16() 291 const v128_t vprodAB = wasm_i64x2_add(wasm_i64x2_mul(vaccAB, vmultiplier), vrounding); in xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16() 292 const v128_t vprodCD = wasm_i64x2_add(wasm_i64x2_mul(vaccCD, vmultiplier), vrounding); in xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16() 293 const v128_t vprodEF = wasm_i64x2_add(wasm_i64x2_mul(vaccEF, vmultiplier), vrounding); in xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16() 294 const v128_t vprodGH = wasm_i64x2_add(wasm_i64x2_mul(vaccGH, vmultiplier), vrounding); in xnn_qs8_dwconv_minmax_ukernel_up24x9__wasmsimd_mul16() [all …]
|
D | up16x9-minmax-wasmsimd-mul16.c | 232 const v128_t vrounding = wasm_v128_load(params->wasmsimd.rounding); in xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16() local 233 const v128_t vprod01 = wasm_i64x2_add(wasm_i64x2_mul(vacc01, vmultiplier), vrounding); in xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16() 234 const v128_t vprod23 = wasm_i64x2_add(wasm_i64x2_mul(vacc23, vmultiplier), vrounding); in xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16() 235 const v128_t vprod45 = wasm_i64x2_add(wasm_i64x2_mul(vacc45, vmultiplier), vrounding); in xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16() 236 const v128_t vprod67 = wasm_i64x2_add(wasm_i64x2_mul(vacc67, vmultiplier), vrounding); in xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16() 237 const v128_t vprod89 = wasm_i64x2_add(wasm_i64x2_mul(vacc89, vmultiplier), vrounding); in xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16() 238 const v128_t vprodAB = wasm_i64x2_add(wasm_i64x2_mul(vaccAB, vmultiplier), vrounding); in xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16() 239 const v128_t vprodCD = wasm_i64x2_add(wasm_i64x2_mul(vaccCD, vmultiplier), vrounding); in xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16() 240 const v128_t vprodEF = wasm_i64x2_add(wasm_i64x2_mul(vaccEF, vmultiplier), vrounding); in xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16() 371 const v128_t vrounding = wasm_v128_load(params->wasmsimd.rounding); in xnn_qs8_dwconv_minmax_ukernel_up16x9__wasmsimd_mul16() local [all …]
|