/external/XNNPACK/src/f16-gavgpool/gen/ |
D | 7p7x-minmax-neonfp16arith-c32.c | 59 float16x8_t vaccOPQRSTUV = vaddq_f16(vi0xOPQRSTUV, vi1xOPQRSTUV); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c32() local 68 vaccOPQRSTUV = vaddq_f16(vaccOPQRSTUV, vi2xOPQRSTUV); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c32() 76 vaccOPQRSTUV = vaddq_f16(vaccOPQRSTUV, vi3xOPQRSTUV); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c32() 84 vaccOPQRSTUV = vaddq_f16(vaccOPQRSTUV, vi4xOPQRSTUV); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c32() 92 vaccOPQRSTUV = vaddq_f16(vaccOPQRSTUV, vi5xOPQRSTUV); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c32() 96 vaccOPQRSTUV = vaddq_f16(vaccOPQRSTUV, vi6xOPQRSTUV); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c32() 101 vst1q_f16(b, vaccOPQRSTUV); b += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c32() 141 float16x8_t vaccOPQRSTUV = vld1q_f16(b + 24); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c32() local 155 vaccOPQRSTUV = vaddq_f16(vaccOPQRSTUV, vi0xOPQRSTUV); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c32() 163 vaccOPQRSTUV = vaddq_f16(vaccOPQRSTUV, vi1xOPQRSTUV); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c32() [all …]
|
D | 7x-minmax-neonfp16arith-c32.c | 76 float16x8_t vaccOPQRSTUV = vaddq_f16(vi0xOPQRSTUV, vi1xOPQRSTUV); in xnn_f16_gavgpool_minmax_ukernel_7x__neonfp16arith_c32() local 85 vaccOPQRSTUV = vaddq_f16(vaccOPQRSTUV, vi2xOPQRSTUV); in xnn_f16_gavgpool_minmax_ukernel_7x__neonfp16arith_c32() 93 vaccOPQRSTUV = vaddq_f16(vaccOPQRSTUV, vi3xOPQRSTUV); in xnn_f16_gavgpool_minmax_ukernel_7x__neonfp16arith_c32() 101 vaccOPQRSTUV = vaddq_f16(vaccOPQRSTUV, vi4xOPQRSTUV); in xnn_f16_gavgpool_minmax_ukernel_7x__neonfp16arith_c32() 109 vaccOPQRSTUV = vaddq_f16(vaccOPQRSTUV, vi5xOPQRSTUV); in xnn_f16_gavgpool_minmax_ukernel_7x__neonfp16arith_c32() 113 vaccOPQRSTUV = vaddq_f16(vaccOPQRSTUV, vi6xOPQRSTUV); in xnn_f16_gavgpool_minmax_ukernel_7x__neonfp16arith_c32() 118 vaccOPQRSTUV = vmulq_f16(vaccOPQRSTUV, vscale); in xnn_f16_gavgpool_minmax_ukernel_7x__neonfp16arith_c32() 123 vaccOPQRSTUV = vmaxq_f16(vaccOPQRSTUV, vmin); in xnn_f16_gavgpool_minmax_ukernel_7x__neonfp16arith_c32() 128 vaccOPQRSTUV = vminq_f16(vaccOPQRSTUV, vmax); in xnn_f16_gavgpool_minmax_ukernel_7x__neonfp16arith_c32() 133 vst1q_f16(output, vaccOPQRSTUV); output = (__fp16*) output + 8; in xnn_f16_gavgpool_minmax_ukernel_7x__neonfp16arith_c32()
|
D | 7p7x-minmax-f16c-c32.c | 60 …__m128i vaccOPQRSTUV = _mm256_cvtps_ph(_mm256_add_ps(vi0xOPQRSTUV, vi1xOPQRSTUV), _MM_FROUND_NO_EX… in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c32() local 69 …vaccOPQRSTUV = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vaccOPQRSTUV), vi2xOPQRSTUV), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c32() 77 …vaccOPQRSTUV = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vaccOPQRSTUV), vi3xOPQRSTUV), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c32() 85 …vaccOPQRSTUV = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vaccOPQRSTUV), vi4xOPQRSTUV), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c32() 93 …vaccOPQRSTUV = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vaccOPQRSTUV), vi5xOPQRSTUV), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c32() 97 …vaccOPQRSTUV = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vaccOPQRSTUV), vi6xOPQRSTUV), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c32() 102 _mm_store_si128((__m128i*) b, vaccOPQRSTUV); b += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c32() 142 __m128i vaccOPQRSTUV = _mm_loadu_si128((const __m128i*) (b + 24)); in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c32() local 156 …vaccOPQRSTUV = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vaccOPQRSTUV), vi0xOPQRSTUV), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c32() 164 …vaccOPQRSTUV = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vaccOPQRSTUV), vi1xOPQRSTUV), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c32() [all …]
|
D | 7x-minmax-f16c-c32.c | 80 …__m128i vaccOPQRSTUV = _mm256_cvtps_ph(_mm256_add_ps(vi0xOPQRSTUV, vi1xOPQRSTUV), _MM_FROUND_NO_EX… in xnn_f16_gavgpool_minmax_ukernel_7x__f16c_c32() local 91 …vaccOPQRSTUV = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vaccOPQRSTUV), vi2xOPQRSTUV), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7x__f16c_c32() 100 …vaccOPQRSTUV = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vaccOPQRSTUV), vi3xOPQRSTUV), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7x__f16c_c32() 109 …vaccOPQRSTUV = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vaccOPQRSTUV), vi4xOPQRSTUV), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7x__f16c_c32() 118 …vaccOPQRSTUV = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vaccOPQRSTUV), vi5xOPQRSTUV), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7x__f16c_c32() 122 …vaccOPQRSTUV = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vaccOPQRSTUV), vi6xOPQRSTUV), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7x__f16c_c32() 127 …vaccOPQRSTUV = _mm256_cvtps_ph(_mm256_mul_ps(_mm256_cvtph_ps(vaccOPQRSTUV), vscale), _MM_FROUND_NO… in xnn_f16_gavgpool_minmax_ukernel_7x__f16c_c32() 132 __m256 voutOPQRSTUV = _mm256_max_ps(_mm256_cvtph_ps(vaccOPQRSTUV), vmin); in xnn_f16_gavgpool_minmax_ukernel_7x__f16c_c32()
|
/external/XNNPACK/src/qs8-gavgpool/gen/ |
D | 7p7x-minmax-fp32-wasmsimd-c32.c | 61 v128_t vaccOPQRSTUV = wasm_i16x8_add(vxi0xOPQRSTUV, vxi1xOPQRSTUV); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32() local 71 vaccOPQRSTUV = wasm_i16x8_add(vaccOPQRSTUV, vxi2xOPQRSTUV); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32() 80 vaccOPQRSTUV = wasm_i16x8_add(vaccOPQRSTUV, vxi3xOPQRSTUV); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32() 89 vaccOPQRSTUV = wasm_i16x8_add(vaccOPQRSTUV, vxi4xOPQRSTUV); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32() 98 vaccOPQRSTUV = wasm_i16x8_add(vaccOPQRSTUV, vxi5xOPQRSTUV); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32() 105 vaccOPQRSTUV = wasm_i16x8_add(vaccOPQRSTUV, vxi6xOPQRSTUV); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32() 113 const v128_t vaccOPQR = wasm_i32x4_add(vinit_bias, wasm_i32x4_extend_low_i16x8(vaccOPQRSTUV)); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32() 114 const v128_t vaccSTUV = wasm_i32x4_add(vinit_bias, wasm_i32x4_extend_high_i16x8(vaccOPQRSTUV)); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32() 192 v128_t vaccOPQRSTUV = wasm_i16x8_add(vxi0xOPQRSTUV, vxi1xOPQRSTUV); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32() local 202 vaccOPQRSTUV = wasm_i16x8_add(vaccOPQRSTUV, vxi2xOPQRSTUV); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32() [all …]
|
D | 7x-minmax-fp32-wasmsimd-c32.c | 80 v128_t vaccOPQRSTUV = wasm_i16x8_add(vxi0xOPQRSTUV, vxi1xOPQRSTUV); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__wasmsimd_c32() local 90 vaccOPQRSTUV = wasm_i16x8_add(vaccOPQRSTUV, vxi2xOPQRSTUV); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__wasmsimd_c32() 99 vaccOPQRSTUV = wasm_i16x8_add(vaccOPQRSTUV, vxi3xOPQRSTUV); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__wasmsimd_c32() 108 vaccOPQRSTUV = wasm_i16x8_add(vaccOPQRSTUV, vxi4xOPQRSTUV); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__wasmsimd_c32() 117 vaccOPQRSTUV = wasm_i16x8_add(vaccOPQRSTUV, vxi5xOPQRSTUV); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__wasmsimd_c32() 124 vaccOPQRSTUV = wasm_i16x8_add(vaccOPQRSTUV, vxi6xOPQRSTUV); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__wasmsimd_c32() 132 v128_t vaccOPQR = wasm_i32x4_add(vinit_bias, wasm_i32x4_extend_low_i16x8(vaccOPQRSTUV)); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__wasmsimd_c32() 133 v128_t vaccSTUV = wasm_i32x4_add(vinit_bias, wasm_i32x4_extend_high_i16x8(vaccOPQRSTUV)); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__wasmsimd_c32()
|
D | 7x-minmax-fp32-neonv8-c32.c | 158 int16x8_t vaccOPQRSTUV = vqmovn_high_s32(vqmovn_s32(vaccOPQR), vaccSTUV); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__neonv8_c32() local 163 int16x8_t vaccOPQRSTUV = vcombine_s16(vqmovn_s32(vaccOPQR), vqmovn_s32(vaccSTUV)); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__neonv8_c32() local 169 vaccOPQRSTUV = vqaddq_s16(vaccOPQRSTUV, voutput_zero_point); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__neonv8_c32() 173 int8x16_t voutGHIJKLMNOPQRSTUV = vqmovn_high_s16(vqmovn_s16(vaccGHIJKLMN), vaccOPQRSTUV); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__neonv8_c32() 176 … int8x16_t voutGHIJKLMNOPQRSTUV = vcombine_s8(vqmovn_s16(vaccGHIJKLMN), vqmovn_s16(vaccOPQRSTUV)); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__neonv8_c32()
|
/external/XNNPACK/src/qu8-gavgpool/gen/ |
D | 7p7x-minmax-fp32-wasmsimd-c32.c | 61 v128_t vaccOPQRSTUV = wasm_i16x8_add(vxi0xOPQRSTUV, vxi1xOPQRSTUV); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32() local 71 vaccOPQRSTUV = wasm_i16x8_add(vaccOPQRSTUV, vxi2xOPQRSTUV); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32() 80 vaccOPQRSTUV = wasm_i16x8_add(vaccOPQRSTUV, vxi3xOPQRSTUV); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32() 89 vaccOPQRSTUV = wasm_i16x8_add(vaccOPQRSTUV, vxi4xOPQRSTUV); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32() 98 vaccOPQRSTUV = wasm_i16x8_add(vaccOPQRSTUV, vxi5xOPQRSTUV); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32() 105 vaccOPQRSTUV = wasm_i16x8_add(vaccOPQRSTUV, vxi6xOPQRSTUV); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32() 113 const v128_t vaccOPQR = wasm_i32x4_add(vinit_bias, wasm_u32x4_extend_low_u16x8(vaccOPQRSTUV)); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32() 114 const v128_t vaccSTUV = wasm_i32x4_add(vinit_bias, wasm_u32x4_extend_high_u16x8(vaccOPQRSTUV)); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32() 192 v128_t vaccOPQRSTUV = wasm_i16x8_add(vxi0xOPQRSTUV, vxi1xOPQRSTUV); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32() local 202 vaccOPQRSTUV = wasm_i16x8_add(vaccOPQRSTUV, vxi2xOPQRSTUV); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32() [all …]
|
D | 7x-minmax-fp32-wasmsimd-c32.c | 80 v128_t vaccOPQRSTUV = wasm_i16x8_add(vxi0xOPQRSTUV, vxi1xOPQRSTUV); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__wasmsimd_c32() local 90 vaccOPQRSTUV = wasm_i16x8_add(vaccOPQRSTUV, vxi2xOPQRSTUV); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__wasmsimd_c32() 99 vaccOPQRSTUV = wasm_i16x8_add(vaccOPQRSTUV, vxi3xOPQRSTUV); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__wasmsimd_c32() 108 vaccOPQRSTUV = wasm_i16x8_add(vaccOPQRSTUV, vxi4xOPQRSTUV); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__wasmsimd_c32() 117 vaccOPQRSTUV = wasm_i16x8_add(vaccOPQRSTUV, vxi5xOPQRSTUV); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__wasmsimd_c32() 124 vaccOPQRSTUV = wasm_i16x8_add(vaccOPQRSTUV, vxi6xOPQRSTUV); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__wasmsimd_c32() 132 v128_t vaccOPQR = wasm_i32x4_add(vinit_bias, wasm_u32x4_extend_low_u16x8(vaccOPQRSTUV)); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__wasmsimd_c32() 133 v128_t vaccSTUV = wasm_i32x4_add(vinit_bias, wasm_u32x4_extend_high_u16x8(vaccOPQRSTUV)); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__wasmsimd_c32()
|
/external/XNNPACK/src/qs8-dwconv/gen/ |
D | up32x25-minmax-fp32-avx2-mul32.c | 167 __m256i vaccOPQRSTUV = _mm256_loadu_si256((const __m256i*) ((const int32_t*) w + 24)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32() local 183 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi0xOPQRSTUV, vk0xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32() 198 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi1xOPQRSTUV, vk1xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32() 213 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi2xOPQRSTUV, vk2xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32() 228 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi3xOPQRSTUV, vk3xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32() 243 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi4xOPQRSTUV, vk4xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32() 258 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi5xOPQRSTUV, vk5xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32() 273 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi6xOPQRSTUV, vk6xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32() 288 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi7xOPQRSTUV, vk7xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32() 303 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi8xOPQRSTUV, vk8xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32() [all …]
|
D | up32x25-minmax-fp32-avx2-mul16-vpmovsx.c | 167 …__m256i vaccOPQRSTUV = _mm256_loadu_si256((const __m256i*) ((uintptr_t) w + 24 * sizeof(int32_t))); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx() local 183 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod0xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx() 198 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod1xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx() 213 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod2xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx() 228 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod3xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx() 243 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod4xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx() 258 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod5xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx() 273 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod6xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx() 288 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod7xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx() 303 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod8xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx() [all …]
|
D | up32x9-minmax-fp32-avx2-mul32.c | 87 __m256i vaccOPQRSTUV = _mm256_loadu_si256((const __m256i*) ((const int32_t*) w + 24)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32() local 103 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi0xOPQRSTUV, vk0xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32() 118 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi1xOPQRSTUV, vk1xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32() 133 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi2xOPQRSTUV, vk2xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32() 148 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi3xOPQRSTUV, vk3xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32() 163 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi4xOPQRSTUV, vk4xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32() 178 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi5xOPQRSTUV, vk5xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32() 193 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi6xOPQRSTUV, vk6xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32() 208 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi7xOPQRSTUV, vk7xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32() 223 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi8xOPQRSTUV, vk8xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32() [all …]
|
D | up32x9-minmax-fp32-avx2-mul16-vpmovsx.c | 87 …__m256i vaccOPQRSTUV = _mm256_loadu_si256((const __m256i*) ((uintptr_t) w + 24 * sizeof(int32_t))); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx() local 103 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod0xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx() 118 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod1xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx() 133 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod2xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx() 148 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod3xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx() 163 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod4xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx() 178 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod5xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx() 193 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod6xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx() 208 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod7xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx() 223 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod8xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx() [all …]
|
/external/XNNPACK/src/qc8-dwconv/gen/ |
D | up32x25-minmax-fp32-avx2-mul16-vpmovsx.c | 167 …__m256i vaccOPQRSTUV = _mm256_loadu_si256((const __m256i*) ((uintptr_t) w + 24 * sizeof(int32_t))); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx() local 183 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod0xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx() 198 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod1xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx() 213 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod2xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx() 228 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod3xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx() 243 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod4xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx() 258 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod5xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx() 273 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod6xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx() 288 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod7xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx() 303 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod8xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx() [all …]
|
D | up32x25-minmax-fp32-avx2-mul32.c | 167 __m256i vaccOPQRSTUV = _mm256_loadu_si256((const __m256i*) ((const int32_t*) w + 24)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32() local 183 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi0xOPQRSTUV, vk0xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32() 198 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi1xOPQRSTUV, vk1xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32() 213 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi2xOPQRSTUV, vk2xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32() 228 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi3xOPQRSTUV, vk3xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32() 243 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi4xOPQRSTUV, vk4xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32() 258 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi5xOPQRSTUV, vk5xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32() 273 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi6xOPQRSTUV, vk6xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32() 288 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi7xOPQRSTUV, vk7xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32() 303 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi8xOPQRSTUV, vk8xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32() [all …]
|
D | up32x9-minmax-fp32-avx2-mul32.c | 87 __m256i vaccOPQRSTUV = _mm256_loadu_si256((const __m256i*) ((const int32_t*) w + 24)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32() local 103 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi0xOPQRSTUV, vk0xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32() 118 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi1xOPQRSTUV, vk1xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32() 133 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi2xOPQRSTUV, vk2xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32() 148 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi3xOPQRSTUV, vk3xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32() 163 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi4xOPQRSTUV, vk4xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32() 178 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi5xOPQRSTUV, vk5xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32() 193 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi6xOPQRSTUV, vk6xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32() 208 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi7xOPQRSTUV, vk7xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32() 223 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi8xOPQRSTUV, vk8xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32() [all …]
|
D | up32x9-minmax-fp32-avx2-mul16-vpmovsx.c | 87 …__m256i vaccOPQRSTUV = _mm256_loadu_si256((const __m256i*) ((uintptr_t) w + 24 * sizeof(int32_t))); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx() local 103 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod0xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx() 118 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod1xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx() 133 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod2xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx() 148 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod3xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx() 163 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod4xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx() 178 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod5xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx() 193 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod6xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx() 208 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod7xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx() 223 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod8xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx() [all …]
|
/external/XNNPACK/src/qu8-dwconv/gen/ |
D | up32x25-minmax-fp32-avx2-mul32.c | 168 __m256i vaccOPQRSTUV = _mm256_loadu_si256((const __m256i*) ((const int32_t*) w + 24)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32() local 184 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi0xOPQRSTUV, vk0xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32() 199 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi1xOPQRSTUV, vk1xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32() 214 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi2xOPQRSTUV, vk2xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32() 229 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi3xOPQRSTUV, vk3xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32() 244 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi4xOPQRSTUV, vk4xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32() 259 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi5xOPQRSTUV, vk5xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32() 274 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi6xOPQRSTUV, vk6xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32() 289 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi7xOPQRSTUV, vk7xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32() 304 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi8xOPQRSTUV, vk8xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32() [all …]
|
D | up32x9-minmax-fp32-avx2-mul32.c | 88 __m256i vaccOPQRSTUV = _mm256_loadu_si256((const __m256i*) ((const int32_t*) w + 24)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32() local 104 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi0xOPQRSTUV, vk0xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32() 119 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi1xOPQRSTUV, vk1xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32() 134 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi2xOPQRSTUV, vk2xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32() 149 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi3xOPQRSTUV, vk3xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32() 164 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi4xOPQRSTUV, vk4xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32() 179 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi5xOPQRSTUV, vk5xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32() 194 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi6xOPQRSTUV, vk6xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32() 209 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi7xOPQRSTUV, vk7xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32() 224 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi8xOPQRSTUV, vk8xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32() [all …]
|
/external/XNNPACK/src/qs8-vadd/gen/ |
D | minmax-avx2-mul32-ld64-x32.c | 48 __m256i vaccOPQRSTUV = _mm256_add_epi32(vbias, _mm256_mullo_epi32(vaOPQRSTUV, va_multiplier)); in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32() local 53 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vbOPQRSTUV, vb_multiplier)); in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32() 58 vaccOPQRSTUV = _mm256_sra_epi32(vaccOPQRSTUV, vshift); in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32() 61 …__m256i voutGHIJOPQRKLMNSTUV = _mm256_adds_epi16(_mm256_packs_epi32(vaccGHIJKLMN, vaccOPQRSTUV), v… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32()
|
/external/XNNPACK/src/qs8-vaddc/gen/ |
D | minmax-avx2-mul32-ld64-x32.c | 44 __m256i vaccOPQRSTUV = _mm256_add_epi32(vbias, _mm256_mullo_epi32(vaOPQRSTUV, va_multiplier)); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32() local 49 vaccOPQRSTUV = _mm256_sra_epi32(vaccOPQRSTUV, vshift); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32() 52 …__m256i voutGHIJOPQRKLMNSTUV = _mm256_adds_epi16(_mm256_packs_epi32(vaccGHIJKLMN, vaccOPQRSTUV), v… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32()
|
/external/XNNPACK/src/f32-qs8-vcvt/gen/ |
D | vcvt-neonv8-x32.c | 65 int16x8_t vaccOPQRSTUV = vcombine_s16(vqmovn_s32(vaccOPQR), vqmovn_s32(vaccSTUV)); in xnn_f32_qs8_vcvt_ukernel__neonv8_x32() local 70 vaccOPQRSTUV = vqaddq_s16(vaccOPQRSTUV, voutput_zero_point); in xnn_f32_qs8_vcvt_ukernel__neonv8_x32() 73 int8x16_t vyGHIJKLMNOPQRSTUV = vcombine_s8(vqmovn_s16(vaccGHIJKLMN), vqmovn_s16(vaccOPQRSTUV)); in xnn_f32_qs8_vcvt_ukernel__neonv8_x32()
|
D | vcvt-wasmsimd-cvt-x32.c | 75 v128_t vaccOPQRSTUV = wasm_i16x8_narrow_i32x4(vaccOPQR, vaccSTUV); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x32() local 80 vaccOPQRSTUV = wasm_i16x8_add_sat(vaccOPQRSTUV, voutput_zero_point); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x32() 83 v128_t vyGHIJKLMNOPQRSTUV = wasm_i8x16_narrow_i16x8(vaccGHIJKLMN, vaccOPQRSTUV); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x32()
|
/external/XNNPACK/src/f32-qu8-vcvt/gen/ |
D | vcvt-neonv8-x32.c | 65 int16x8_t vaccOPQRSTUV = vcombine_s16(vqmovn_s32(vaccOPQR), vqmovn_s32(vaccSTUV)); in xnn_f32_qu8_vcvt_ukernel__neonv8_x32() local 70 vaccOPQRSTUV = vqaddq_s16(vaccOPQRSTUV, voutput_zero_point); in xnn_f32_qu8_vcvt_ukernel__neonv8_x32() 73 … uint8x16_t vyGHIJKLMNOPQRSTUV = vcombine_u8(vqmovun_s16(vaccGHIJKLMN), vqmovun_s16(vaccOPQRSTUV)); in xnn_f32_qu8_vcvt_ukernel__neonv8_x32()
|
D | vcvt-wasmsimd-cvt-x32.c | 75 v128_t vaccOPQRSTUV = wasm_i16x8_narrow_i32x4(vaccOPQR, vaccSTUV); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x32() local 80 vaccOPQRSTUV = wasm_i16x8_add_sat(vaccOPQRSTUV, voutput_zero_point); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x32() 83 v128_t vyGHIJKLMNOPQRSTUV = wasm_u8x16_narrow_i16x8(vaccGHIJKLMN, vaccOPQRSTUV); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x32()
|