/external/XNNPACK/src/qs8-gavgpool/gen/ |
D | 7x-minmax-wasmsimd-c8-acc2.c | 90 const v128_t vabsacc0123 = wasm_i32x4_abs(vacc0123); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c8_acc2() local 93 const v128_t vsgnacc0123 = wasm_i32x4_gt(vabsacc0123, vacc0123); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c8_acc2() 96 const v128_t vabsacc01 = wasm_v32x4_shuffle(vabsacc0123, vzero, 0, 4, 1, 5); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c8_acc2() 97 const v128_t vabsacc23 = wasm_v32x4_shuffle(vabsacc0123, vzero, 2, 6, 3, 7); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c8_acc2() 159 const v128_t vabsacc0123 = wasm_i32x4_abs(vacc0123); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c8_acc2() local 162 const v128_t vsgnacc0123 = wasm_i32x4_gt(vabsacc0123, vacc0123); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c8_acc2() 165 const v128_t vabsacc01 = wasm_v32x4_shuffle(vabsacc0123, vzero, 0, 4, 1, 5); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c8_acc2() 166 const v128_t vabsacc23 = wasm_v32x4_shuffle(vabsacc0123, vzero, 2, 6, 3, 7); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c8_acc2()
|
D | 7x-minmax-wasmsimd-c16-acc2.c | 105 const v128_t vabsacc0123 = wasm_i32x4_abs(vacc0123); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c16_acc2() local 110 const v128_t vsgnacc0123 = wasm_i32x4_gt(vabsacc0123, vacc0123); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c16_acc2() 115 const v128_t vabsacc01 = wasm_v32x4_shuffle(vabsacc0123, vzero, 0, 4, 1, 5); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c16_acc2() 116 const v128_t vabsacc23 = wasm_v32x4_shuffle(vabsacc0123, vzero, 2, 6, 3, 7); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c16_acc2() 195 const v128_t vabsacc0123 = wasm_i32x4_abs(vacc0123); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c16_acc2() local 198 const v128_t vsgnacc0123 = wasm_i32x4_gt(vabsacc0123, vacc0123); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c16_acc2() 201 const v128_t vabsacc01 = wasm_v32x4_shuffle(vabsacc0123, vzero, 0, 4, 1, 5); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c16_acc2() 202 const v128_t vabsacc23 = wasm_v32x4_shuffle(vabsacc0123, vzero, 2, 6, 3, 7); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c16_acc2()
|
D | 7x-minmax-sse41-c8-acc2.c | 90 const __m128i vabsacc0123 = _mm_abs_epi32(vacc0123); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c8_acc2() local 93 const __m128i vabsacc13 = _mm_shuffle_epi32(vabsacc0123, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c8_acc2() 96 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c8_acc2() 157 const __m128i vabsacc0123 = _mm_abs_epi32(vacc0123); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c8_acc2() local 160 const __m128i vabsacc13 = _mm_shuffle_epi32(vabsacc0123, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c8_acc2() 163 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c8_acc2()
|
D | 7p7x-minmax-wasmsimd-c8-acc2.c | 183 const v128_t vabsacc0123 = wasm_i32x4_abs(vacc0123); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2() local 186 const v128_t vsgnacc0123 = wasm_i32x4_gt(vabsacc0123, vacc0123); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2() 189 const v128_t vabsacc01 = wasm_v32x4_shuffle(vabsacc0123, vzero, 0, 4, 1, 5); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2() 190 const v128_t vabsacc23 = wasm_v32x4_shuffle(vabsacc0123, vzero, 2, 6, 3, 7); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2() 253 const v128_t vabsacc0123 = wasm_i32x4_abs(vacc0123); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2() local 256 const v128_t vsgnacc0123 = wasm_i32x4_gt(vabsacc0123, vacc0123); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2() 259 const v128_t vabsacc01 = wasm_v32x4_shuffle(vabsacc0123, vzero, 0, 4, 1, 5); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2() 260 const v128_t vabsacc23 = wasm_v32x4_shuffle(vabsacc0123, vzero, 2, 6, 3, 7); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c8_acc2()
|
D | 7x-minmax-wasmsimd-c24-acc2.c | 120 const v128_t vabsacc0123 = wasm_i32x4_abs(vacc0123); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2() local 127 const v128_t vsgnacc0123 = wasm_i32x4_gt(vabsacc0123, vacc0123); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2() 134 const v128_t vabsacc01 = wasm_v32x4_shuffle(vabsacc0123, vzero, 0, 4, 1, 5); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2() 135 const v128_t vabsacc23 = wasm_v32x4_shuffle(vabsacc0123, vzero, 2, 6, 3, 7); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2() 233 const v128_t vabsacc0123 = wasm_i32x4_abs(vacc0123); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2() local 236 const v128_t vsgnacc0123 = wasm_i32x4_gt(vabsacc0123, vacc0123); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2() 239 const v128_t vabsacc01 = wasm_v32x4_shuffle(vabsacc0123, vzero, 0, 4, 1, 5); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2() 240 const v128_t vabsacc23 = wasm_v32x4_shuffle(vabsacc0123, vzero, 2, 6, 3, 7); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2()
|
D | 7x-minmax-ssse3-c8-acc2.c | 98 const __m128i vabsacc0123 = _mm_abs_epi32(vacc0123); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c8_acc2() local 101 const __m128i vabsacc13 = _mm_shuffle_epi32(vabsacc0123, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c8_acc2() 104 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c8_acc2() 178 const __m128i vabsacc0123 = _mm_abs_epi32(vacc0123); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c8_acc2() local 181 const __m128i vabsacc13 = _mm_shuffle_epi32(vabsacc0123, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c8_acc2() 184 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c8_acc2()
|
D | 7x-minmax-sse2-c8-acc2.c | 101 const __m128i vabsacc0123 = _mm_sub_epi32(_mm_xor_si128(vacc0123, vsgnacc0123), vsgnacc0123); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c8_acc2() local 104 const __m128i vabsacc13 = _mm_shuffle_epi32(vabsacc0123, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c8_acc2() 107 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c8_acc2() 184 const __m128i vabsacc0123 = _mm_sub_epi32(_mm_xor_si128(vacc0123, vsgnacc0123), vsgnacc0123); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c8_acc2() local 187 const __m128i vabsacc13 = _mm_shuffle_epi32(vabsacc0123, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c8_acc2() 190 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c8_acc2()
|
D | 7x-minmax-sse41-c16-acc2.c | 105 const __m128i vabsacc0123 = _mm_abs_epi32(vacc0123); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2() local 110 const __m128i vabsacc13 = _mm_shuffle_epi32(vabsacc0123, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2() 115 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2() 190 const __m128i vabsacc0123 = _mm_abs_epi32(vacc0123); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2() local 193 const __m128i vabsacc13 = _mm_shuffle_epi32(vabsacc0123, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2() 196 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2()
|
D | 7p7x-minmax-wasmsimd-c16-acc2.c | 232 const v128_t vabsacc0123 = wasm_i32x4_abs(vacc0123); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c16_acc2() local 237 const v128_t vsgnacc0123 = wasm_i32x4_gt(vabsacc0123, vacc0123); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c16_acc2() 242 const v128_t vabsacc01 = wasm_v32x4_shuffle(vabsacc0123, vzero, 0, 4, 1, 5); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c16_acc2() 243 const v128_t vabsacc23 = wasm_v32x4_shuffle(vabsacc0123, vzero, 2, 6, 3, 7); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c16_acc2() 323 const v128_t vabsacc0123 = wasm_i32x4_abs(vacc0123); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c16_acc2() local 326 const v128_t vsgnacc0123 = wasm_i32x4_gt(vabsacc0123, vacc0123); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c16_acc2() 329 const v128_t vabsacc01 = wasm_v32x4_shuffle(vabsacc0123, vzero, 0, 4, 1, 5); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c16_acc2() 330 const v128_t vabsacc23 = wasm_v32x4_shuffle(vabsacc0123, vzero, 2, 6, 3, 7); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c16_acc2()
|
D | 7p7x-minmax-sse41-c8-acc2.c | 185 const __m128i vabsacc0123 = _mm_abs_epi32(vacc0123); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2() local 188 const __m128i vabsacc13 = _mm_shuffle_epi32(vabsacc0123, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2() 191 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2() 253 const __m128i vabsacc0123 = _mm_abs_epi32(vacc0123); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2() local 256 const __m128i vabsacc13 = _mm_shuffle_epi32(vabsacc0123, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2() 259 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2()
|
D | 7x-minmax-sse41-c24-acc2.c | 120 const __m128i vabsacc0123 = _mm_abs_epi32(vacc0123); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() local 127 const __m128i vabsacc13 = _mm_shuffle_epi32(vabsacc0123, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() 134 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() 225 const __m128i vabsacc0123 = _mm_abs_epi32(vacc0123); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() local 228 const __m128i vabsacc13 = _mm_shuffle_epi32(vabsacc0123, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() 231 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2()
|
D | 7x-minmax-ssse3-c16-acc2.c | 121 const __m128i vabsacc0123 = _mm_abs_epi32(vacc0123); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c16_acc2() local 126 const __m128i vabsacc13 = _mm_shuffle_epi32(vabsacc0123, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c16_acc2() 131 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c16_acc2() 223 const __m128i vabsacc0123 = _mm_abs_epi32(vacc0123); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c16_acc2() local 226 const __m128i vabsacc13 = _mm_shuffle_epi32(vabsacc0123, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c16_acc2() 229 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c16_acc2()
|
D | 7x-minmax-sse2-c16-acc2.c | 126 const __m128i vabsacc0123 = _mm_sub_epi32(_mm_xor_si128(vacc0123, vsgnacc0123), vsgnacc0123); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c16_acc2() local 131 const __m128i vabsacc13 = _mm_shuffle_epi32(vabsacc0123, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c16_acc2() 136 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c16_acc2() 231 const __m128i vabsacc0123 = _mm_sub_epi32(_mm_xor_si128(vacc0123, vsgnacc0123), vsgnacc0123); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c16_acc2() local 234 const __m128i vabsacc13 = _mm_shuffle_epi32(vabsacc0123, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c16_acc2() 237 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c16_acc2()
|
D | 7p7x-minmax-sse2-c8-acc2.c | 212 const __m128i vabsacc0123 = _mm_sub_epi32(_mm_xor_si128(vacc0123, vsgnacc0123), vsgnacc0123); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2() local 215 const __m128i vabsacc13 = _mm_shuffle_epi32(vabsacc0123, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2() 218 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2() 296 const __m128i vabsacc0123 = _mm_sub_epi32(_mm_xor_si128(vacc0123, vsgnacc0123), vsgnacc0123); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2() local 299 const __m128i vabsacc13 = _mm_shuffle_epi32(vabsacc0123, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2() 302 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2()
|
D | 7p7x-minmax-ssse3-c8-acc2.c | 209 const __m128i vabsacc0123 = _mm_abs_epi32(vacc0123); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2() local 212 const __m128i vabsacc13 = _mm_shuffle_epi32(vabsacc0123, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2() 215 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2() 290 const __m128i vabsacc0123 = _mm_abs_epi32(vacc0123); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2() local 293 const __m128i vabsacc13 = _mm_shuffle_epi32(vabsacc0123, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2() 296 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2()
|
D | 7x-minmax-ssse3-c24-acc2.c | 144 const __m128i vabsacc0123 = _mm_abs_epi32(vacc0123); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c24_acc2() local 151 const __m128i vabsacc13 = _mm_shuffle_epi32(vabsacc0123, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c24_acc2() 158 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c24_acc2() 270 const __m128i vabsacc0123 = _mm_abs_epi32(vacc0123); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c24_acc2() local 273 const __m128i vabsacc13 = _mm_shuffle_epi32(vabsacc0123, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c24_acc2() 276 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c24_acc2()
|
D | 7x-minmax-sse2-c24-acc2.c | 151 const __m128i vabsacc0123 = _mm_sub_epi32(_mm_xor_si128(vacc0123, vsgnacc0123), vsgnacc0123); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2() local 158 const __m128i vabsacc13 = _mm_shuffle_epi32(vabsacc0123, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2() 165 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2() 280 const __m128i vabsacc0123 = _mm_sub_epi32(_mm_xor_si128(vacc0123, vsgnacc0123), vsgnacc0123); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2() local 283 const __m128i vabsacc13 = _mm_shuffle_epi32(vabsacc0123, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2() 286 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c24_acc2()
|
D | 7p7x-minmax-sse41-c16-acc2.c | 234 const __m128i vabsacc0123 = _mm_abs_epi32(vacc0123); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2() local 239 const __m128i vabsacc13 = _mm_shuffle_epi32(vabsacc0123, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2() 244 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2() 320 const __m128i vabsacc0123 = _mm_abs_epi32(vacc0123); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2() local 323 const __m128i vabsacc13 = _mm_shuffle_epi32(vabsacc0123, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2() 326 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2()
|
D | 7p7x-minmax-wasmsimd-c24-acc2.c | 355 const v128_t vabsacc0123 = wasm_i32x4_abs(vacc0123); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2() local 362 const v128_t vsgnacc0123 = wasm_i32x4_gt(vabsacc0123, vacc0123); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2() 369 const v128_t vabsacc01 = wasm_v32x4_shuffle(vabsacc0123, vzero, 0, 4, 1, 5); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2() 370 const v128_t vabsacc23 = wasm_v32x4_shuffle(vabsacc0123, vzero, 2, 6, 3, 7); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2() 469 const v128_t vabsacc0123 = wasm_i32x4_abs(vacc0123); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2() local 472 const v128_t vsgnacc0123 = wasm_i32x4_gt(vabsacc0123, vacc0123); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2() 475 const v128_t vabsacc01 = wasm_v32x4_shuffle(vabsacc0123, vzero, 0, 4, 1, 5); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2() 476 const v128_t vabsacc23 = wasm_v32x4_shuffle(vabsacc0123, vzero, 2, 6, 3, 7); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__wasmsimd_c24_acc2()
|
D | 7p7x-minmax-ssse3-c16-acc2.c | 282 const __m128i vabsacc0123 = _mm_abs_epi32(vacc0123); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() local 287 const __m128i vabsacc13 = _mm_shuffle_epi32(vabsacc0123, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() 292 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() 385 const __m128i vabsacc0123 = _mm_abs_epi32(vacc0123); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() local 388 const __m128i vabsacc13 = _mm_shuffle_epi32(vabsacc0123, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() 391 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2()
|
D | 7p7x-minmax-sse2-c16-acc2.c | 287 const __m128i vabsacc0123 = _mm_sub_epi32(_mm_xor_si128(vacc0123, vsgnacc0123), vsgnacc0123); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2() local 292 const __m128i vabsacc13 = _mm_shuffle_epi32(vabsacc0123, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2() 297 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2() 393 const __m128i vabsacc0123 = _mm_sub_epi32(_mm_xor_si128(vacc0123, vsgnacc0123), vsgnacc0123); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2() local 396 const __m128i vabsacc13 = _mm_shuffle_epi32(vabsacc0123, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2() 399 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c16_acc2()
|
D | 7p7x-minmax-sse41-c24-acc2.c | 359 const __m128i vabsacc0123 = _mm_abs_epi32(vacc0123); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() local 366 const __m128i vabsacc13 = _mm_shuffle_epi32(vabsacc0123, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() 373 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() 465 const __m128i vabsacc0123 = _mm_abs_epi32(vacc0123); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() local 468 const __m128i vabsacc13 = _mm_shuffle_epi32(vabsacc0123, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2() 471 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2()
|
/external/XNNPACK/src/qs8-dwconv/gen/ |
D | up8x9-minmax-sse2-mul16.c | 212 const __m128i vabsacc0123 = _mm_sub_epi32(_mm_xor_si128(vacc0123, vnmask0123), vnmask0123); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() local 215 const __m128i vabsacc13 = _mm_shuffle_epi32(vabsacc0123, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 216 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 391 const __m128i vabsacc0123 = _mm_sub_epi32(_mm_xor_si128(vacc0123, vnmask0123), vnmask0123); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() local 394 const __m128i vabsacc13 = _mm_shuffle_epi32(vabsacc0123, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16() 397 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse2_mul16()
|
D | up8x9-minmax-ssse3-mul16.c | 212 const __m128i vabsacc0123 = _mm_abs_epi32(vacc0123); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() local 215 const __m128i vabsacc13 = _mm_shuffle_epi32(vabsacc0123, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 216 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 391 const __m128i vabsacc0123 = _mm_abs_epi32(vacc0123); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() local 394 const __m128i vabsacc13 = _mm_shuffle_epi32(vabsacc0123, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16() 397 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_dwconv_minmax_ukernel_up8x9__ssse3_mul16()
|
D | up16x9-minmax-ssse3-mul16.c | 288 const __m128i vabsacc0123 = _mm_abs_epi32(vacc0123); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() local 293 const __m128i vabsacc13 = _mm_shuffle_epi32(vabsacc0123, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 294 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 515 const __m128i vabsacc0123 = _mm_abs_epi32(vacc0123); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() local 518 const __m128i vabsacc13 = _mm_shuffle_epi32(vabsacc0123, _MM_SHUFFLE(3, 3, 1, 1)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16() 521 const __m128i vabsprod02 = _mm_mul_epu32(vabsacc0123, vmultiplier); in xnn_qs8_dwconv_minmax_ukernel_up16x9__ssse3_mul16()
|