Home
last modified time | relevance | path

Searched refs:vaccOPQRSTUV (Results 1 – 25 of 93) sorted by relevance

1234

/external/XNNPACK/src/f16-gavgpool/gen/
D7p7x-minmax-neonfp16arith-c32.c59 float16x8_t vaccOPQRSTUV = vaddq_f16(vi0xOPQRSTUV, vi1xOPQRSTUV); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c32() local
68 vaccOPQRSTUV = vaddq_f16(vaccOPQRSTUV, vi2xOPQRSTUV); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c32()
76 vaccOPQRSTUV = vaddq_f16(vaccOPQRSTUV, vi3xOPQRSTUV); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c32()
84 vaccOPQRSTUV = vaddq_f16(vaccOPQRSTUV, vi4xOPQRSTUV); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c32()
92 vaccOPQRSTUV = vaddq_f16(vaccOPQRSTUV, vi5xOPQRSTUV); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c32()
96 vaccOPQRSTUV = vaddq_f16(vaccOPQRSTUV, vi6xOPQRSTUV); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c32()
101 vst1q_f16(b, vaccOPQRSTUV); b += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c32()
141 float16x8_t vaccOPQRSTUV = vld1q_f16(b + 24); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c32() local
155 vaccOPQRSTUV = vaddq_f16(vaccOPQRSTUV, vi0xOPQRSTUV); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c32()
163 vaccOPQRSTUV = vaddq_f16(vaccOPQRSTUV, vi1xOPQRSTUV); in xnn_f16_gavgpool_minmax_ukernel_7p7x__neonfp16arith_c32()
[all …]
D7x-minmax-neonfp16arith-c32.c76 float16x8_t vaccOPQRSTUV = vaddq_f16(vi0xOPQRSTUV, vi1xOPQRSTUV); in xnn_f16_gavgpool_minmax_ukernel_7x__neonfp16arith_c32() local
85 vaccOPQRSTUV = vaddq_f16(vaccOPQRSTUV, vi2xOPQRSTUV); in xnn_f16_gavgpool_minmax_ukernel_7x__neonfp16arith_c32()
93 vaccOPQRSTUV = vaddq_f16(vaccOPQRSTUV, vi3xOPQRSTUV); in xnn_f16_gavgpool_minmax_ukernel_7x__neonfp16arith_c32()
101 vaccOPQRSTUV = vaddq_f16(vaccOPQRSTUV, vi4xOPQRSTUV); in xnn_f16_gavgpool_minmax_ukernel_7x__neonfp16arith_c32()
109 vaccOPQRSTUV = vaddq_f16(vaccOPQRSTUV, vi5xOPQRSTUV); in xnn_f16_gavgpool_minmax_ukernel_7x__neonfp16arith_c32()
113 vaccOPQRSTUV = vaddq_f16(vaccOPQRSTUV, vi6xOPQRSTUV); in xnn_f16_gavgpool_minmax_ukernel_7x__neonfp16arith_c32()
118 vaccOPQRSTUV = vmulq_f16(vaccOPQRSTUV, vscale); in xnn_f16_gavgpool_minmax_ukernel_7x__neonfp16arith_c32()
123 vaccOPQRSTUV = vmaxq_f16(vaccOPQRSTUV, vmin); in xnn_f16_gavgpool_minmax_ukernel_7x__neonfp16arith_c32()
128 vaccOPQRSTUV = vminq_f16(vaccOPQRSTUV, vmax); in xnn_f16_gavgpool_minmax_ukernel_7x__neonfp16arith_c32()
133 vst1q_f16(output, vaccOPQRSTUV); output = (__fp16*) output + 8; in xnn_f16_gavgpool_minmax_ukernel_7x__neonfp16arith_c32()
D7p7x-minmax-f16c-c32.c60 …__m128i vaccOPQRSTUV = _mm256_cvtps_ph(_mm256_add_ps(vi0xOPQRSTUV, vi1xOPQRSTUV), _MM_FROUND_NO_EX… in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c32() local
69vaccOPQRSTUV = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vaccOPQRSTUV), vi2xOPQRSTUV), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c32()
77vaccOPQRSTUV = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vaccOPQRSTUV), vi3xOPQRSTUV), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c32()
85vaccOPQRSTUV = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vaccOPQRSTUV), vi4xOPQRSTUV), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c32()
93vaccOPQRSTUV = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vaccOPQRSTUV), vi5xOPQRSTUV), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c32()
97vaccOPQRSTUV = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vaccOPQRSTUV), vi6xOPQRSTUV), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c32()
102 _mm_store_si128((__m128i*) b, vaccOPQRSTUV); b += 8; in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c32()
142 __m128i vaccOPQRSTUV = _mm_loadu_si128((const __m128i*) (b + 24)); in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c32() local
156vaccOPQRSTUV = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vaccOPQRSTUV), vi0xOPQRSTUV), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c32()
164vaccOPQRSTUV = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vaccOPQRSTUV), vi1xOPQRSTUV), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7p7x__f16c_c32()
[all …]
D7x-minmax-f16c-c32.c80 …__m128i vaccOPQRSTUV = _mm256_cvtps_ph(_mm256_add_ps(vi0xOPQRSTUV, vi1xOPQRSTUV), _MM_FROUND_NO_EX… in xnn_f16_gavgpool_minmax_ukernel_7x__f16c_c32() local
91vaccOPQRSTUV = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vaccOPQRSTUV), vi2xOPQRSTUV), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7x__f16c_c32()
100vaccOPQRSTUV = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vaccOPQRSTUV), vi3xOPQRSTUV), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7x__f16c_c32()
109vaccOPQRSTUV = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vaccOPQRSTUV), vi4xOPQRSTUV), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7x__f16c_c32()
118vaccOPQRSTUV = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vaccOPQRSTUV), vi5xOPQRSTUV), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7x__f16c_c32()
122vaccOPQRSTUV = _mm256_cvtps_ph(_mm256_add_ps(_mm256_cvtph_ps(vaccOPQRSTUV), vi6xOPQRSTUV), _MM_FRO… in xnn_f16_gavgpool_minmax_ukernel_7x__f16c_c32()
127vaccOPQRSTUV = _mm256_cvtps_ph(_mm256_mul_ps(_mm256_cvtph_ps(vaccOPQRSTUV), vscale), _MM_FROUND_NO… in xnn_f16_gavgpool_minmax_ukernel_7x__f16c_c32()
132 __m256 voutOPQRSTUV = _mm256_max_ps(_mm256_cvtph_ps(vaccOPQRSTUV), vmin); in xnn_f16_gavgpool_minmax_ukernel_7x__f16c_c32()
/external/XNNPACK/src/qs8-gavgpool/gen/
D7p7x-minmax-fp32-wasmsimd-c32.c61 v128_t vaccOPQRSTUV = wasm_i16x8_add(vxi0xOPQRSTUV, vxi1xOPQRSTUV); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32() local
71 vaccOPQRSTUV = wasm_i16x8_add(vaccOPQRSTUV, vxi2xOPQRSTUV); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32()
80 vaccOPQRSTUV = wasm_i16x8_add(vaccOPQRSTUV, vxi3xOPQRSTUV); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32()
89 vaccOPQRSTUV = wasm_i16x8_add(vaccOPQRSTUV, vxi4xOPQRSTUV); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32()
98 vaccOPQRSTUV = wasm_i16x8_add(vaccOPQRSTUV, vxi5xOPQRSTUV); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32()
105 vaccOPQRSTUV = wasm_i16x8_add(vaccOPQRSTUV, vxi6xOPQRSTUV); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32()
113 const v128_t vaccOPQR = wasm_i32x4_add(vinit_bias, wasm_i32x4_extend_low_i16x8(vaccOPQRSTUV)); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32()
114 const v128_t vaccSTUV = wasm_i32x4_add(vinit_bias, wasm_i32x4_extend_high_i16x8(vaccOPQRSTUV)); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32()
192 v128_t vaccOPQRSTUV = wasm_i16x8_add(vxi0xOPQRSTUV, vxi1xOPQRSTUV); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32() local
202 vaccOPQRSTUV = wasm_i16x8_add(vaccOPQRSTUV, vxi2xOPQRSTUV); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32()
[all …]
D7x-minmax-fp32-wasmsimd-c32.c80 v128_t vaccOPQRSTUV = wasm_i16x8_add(vxi0xOPQRSTUV, vxi1xOPQRSTUV); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__wasmsimd_c32() local
90 vaccOPQRSTUV = wasm_i16x8_add(vaccOPQRSTUV, vxi2xOPQRSTUV); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__wasmsimd_c32()
99 vaccOPQRSTUV = wasm_i16x8_add(vaccOPQRSTUV, vxi3xOPQRSTUV); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__wasmsimd_c32()
108 vaccOPQRSTUV = wasm_i16x8_add(vaccOPQRSTUV, vxi4xOPQRSTUV); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__wasmsimd_c32()
117 vaccOPQRSTUV = wasm_i16x8_add(vaccOPQRSTUV, vxi5xOPQRSTUV); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__wasmsimd_c32()
124 vaccOPQRSTUV = wasm_i16x8_add(vaccOPQRSTUV, vxi6xOPQRSTUV); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__wasmsimd_c32()
132 v128_t vaccOPQR = wasm_i32x4_add(vinit_bias, wasm_i32x4_extend_low_i16x8(vaccOPQRSTUV)); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__wasmsimd_c32()
133 v128_t vaccSTUV = wasm_i32x4_add(vinit_bias, wasm_i32x4_extend_high_i16x8(vaccOPQRSTUV)); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__wasmsimd_c32()
D7x-minmax-fp32-neonv8-c32.c158 int16x8_t vaccOPQRSTUV = vqmovn_high_s32(vqmovn_s32(vaccOPQR), vaccSTUV); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__neonv8_c32() local
163 int16x8_t vaccOPQRSTUV = vcombine_s16(vqmovn_s32(vaccOPQR), vqmovn_s32(vaccSTUV)); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__neonv8_c32() local
169 vaccOPQRSTUV = vqaddq_s16(vaccOPQRSTUV, voutput_zero_point); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__neonv8_c32()
173 int8x16_t voutGHIJKLMNOPQRSTUV = vqmovn_high_s16(vqmovn_s16(vaccGHIJKLMN), vaccOPQRSTUV); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__neonv8_c32()
176 … int8x16_t voutGHIJKLMNOPQRSTUV = vcombine_s8(vqmovn_s16(vaccGHIJKLMN), vqmovn_s16(vaccOPQRSTUV)); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__neonv8_c32()
/external/XNNPACK/src/qu8-gavgpool/gen/
D7p7x-minmax-fp32-wasmsimd-c32.c61 v128_t vaccOPQRSTUV = wasm_i16x8_add(vxi0xOPQRSTUV, vxi1xOPQRSTUV); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32() local
71 vaccOPQRSTUV = wasm_i16x8_add(vaccOPQRSTUV, vxi2xOPQRSTUV); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32()
80 vaccOPQRSTUV = wasm_i16x8_add(vaccOPQRSTUV, vxi3xOPQRSTUV); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32()
89 vaccOPQRSTUV = wasm_i16x8_add(vaccOPQRSTUV, vxi4xOPQRSTUV); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32()
98 vaccOPQRSTUV = wasm_i16x8_add(vaccOPQRSTUV, vxi5xOPQRSTUV); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32()
105 vaccOPQRSTUV = wasm_i16x8_add(vaccOPQRSTUV, vxi6xOPQRSTUV); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32()
113 const v128_t vaccOPQR = wasm_i32x4_add(vinit_bias, wasm_u32x4_extend_low_u16x8(vaccOPQRSTUV)); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32()
114 const v128_t vaccSTUV = wasm_i32x4_add(vinit_bias, wasm_u32x4_extend_high_u16x8(vaccOPQRSTUV)); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32()
192 v128_t vaccOPQRSTUV = wasm_i16x8_add(vxi0xOPQRSTUV, vxi1xOPQRSTUV); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32() local
202 vaccOPQRSTUV = wasm_i16x8_add(vaccOPQRSTUV, vxi2xOPQRSTUV); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7p7x__wasmsimd_c32()
[all …]
D7x-minmax-fp32-wasmsimd-c32.c80 v128_t vaccOPQRSTUV = wasm_i16x8_add(vxi0xOPQRSTUV, vxi1xOPQRSTUV); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__wasmsimd_c32() local
90 vaccOPQRSTUV = wasm_i16x8_add(vaccOPQRSTUV, vxi2xOPQRSTUV); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__wasmsimd_c32()
99 vaccOPQRSTUV = wasm_i16x8_add(vaccOPQRSTUV, vxi3xOPQRSTUV); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__wasmsimd_c32()
108 vaccOPQRSTUV = wasm_i16x8_add(vaccOPQRSTUV, vxi4xOPQRSTUV); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__wasmsimd_c32()
117 vaccOPQRSTUV = wasm_i16x8_add(vaccOPQRSTUV, vxi5xOPQRSTUV); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__wasmsimd_c32()
124 vaccOPQRSTUV = wasm_i16x8_add(vaccOPQRSTUV, vxi6xOPQRSTUV); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__wasmsimd_c32()
132 v128_t vaccOPQR = wasm_i32x4_add(vinit_bias, wasm_u32x4_extend_low_u16x8(vaccOPQRSTUV)); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__wasmsimd_c32()
133 v128_t vaccSTUV = wasm_i32x4_add(vinit_bias, wasm_u32x4_extend_high_u16x8(vaccOPQRSTUV)); in xnn_qu8_gavgpool_minmax_fp32_ukernel_7x__wasmsimd_c32()
/external/XNNPACK/src/qs8-dwconv/gen/
Dup32x25-minmax-fp32-avx2-mul32.c167 __m256i vaccOPQRSTUV = _mm256_loadu_si256((const __m256i*) ((const int32_t*) w + 24)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32() local
183 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi0xOPQRSTUV, vk0xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32()
198 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi1xOPQRSTUV, vk1xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32()
213 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi2xOPQRSTUV, vk2xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32()
228 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi3xOPQRSTUV, vk3xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32()
243 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi4xOPQRSTUV, vk4xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32()
258 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi5xOPQRSTUV, vk5xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32()
273 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi6xOPQRSTUV, vk6xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32()
288 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi7xOPQRSTUV, vk7xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32()
303 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi8xOPQRSTUV, vk8xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32()
[all …]
Dup32x25-minmax-fp32-avx2-mul16-vpmovsx.c167 …__m256i vaccOPQRSTUV = _mm256_loadu_si256((const __m256i*) ((uintptr_t) w + 24 * sizeof(int32_t))); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx() local
183 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod0xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx()
198 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod1xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx()
213 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod2xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx()
228 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod3xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx()
243 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod4xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx()
258 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod5xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx()
273 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod6xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx()
288 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod7xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx()
303 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod8xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx()
[all …]
Dup32x9-minmax-fp32-avx2-mul32.c87 __m256i vaccOPQRSTUV = _mm256_loadu_si256((const __m256i*) ((const int32_t*) w + 24)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32() local
103 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi0xOPQRSTUV, vk0xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32()
118 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi1xOPQRSTUV, vk1xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32()
133 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi2xOPQRSTUV, vk2xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32()
148 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi3xOPQRSTUV, vk3xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32()
163 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi4xOPQRSTUV, vk4xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32()
178 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi5xOPQRSTUV, vk5xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32()
193 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi6xOPQRSTUV, vk6xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32()
208 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi7xOPQRSTUV, vk7xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32()
223 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi8xOPQRSTUV, vk8xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32()
[all …]
Dup32x9-minmax-fp32-avx2-mul16-vpmovsx.c87 …__m256i vaccOPQRSTUV = _mm256_loadu_si256((const __m256i*) ((uintptr_t) w + 24 * sizeof(int32_t))); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx() local
103 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod0xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx()
118 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod1xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx()
133 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod2xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx()
148 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod3xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx()
163 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod4xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx()
178 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod5xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx()
193 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod6xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx()
208 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod7xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx()
223 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod8xOPQRSTUV)); in xnn_qs8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx()
[all …]
/external/XNNPACK/src/qc8-dwconv/gen/
Dup32x25-minmax-fp32-avx2-mul16-vpmovsx.c167 …__m256i vaccOPQRSTUV = _mm256_loadu_si256((const __m256i*) ((uintptr_t) w + 24 * sizeof(int32_t))); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx() local
183 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod0xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx()
198 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod1xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx()
213 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod2xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx()
228 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod3xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx()
243 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod4xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx()
258 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod5xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx()
273 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod6xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx()
288 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod7xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx()
303 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod8xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul16_vpmovsx()
[all …]
Dup32x25-minmax-fp32-avx2-mul32.c167 __m256i vaccOPQRSTUV = _mm256_loadu_si256((const __m256i*) ((const int32_t*) w + 24)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32() local
183 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi0xOPQRSTUV, vk0xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32()
198 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi1xOPQRSTUV, vk1xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32()
213 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi2xOPQRSTUV, vk2xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32()
228 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi3xOPQRSTUV, vk3xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32()
243 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi4xOPQRSTUV, vk4xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32()
258 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi5xOPQRSTUV, vk5xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32()
273 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi6xOPQRSTUV, vk6xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32()
288 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi7xOPQRSTUV, vk7xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32()
303 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi8xOPQRSTUV, vk8xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32()
[all …]
Dup32x9-minmax-fp32-avx2-mul32.c87 __m256i vaccOPQRSTUV = _mm256_loadu_si256((const __m256i*) ((const int32_t*) w + 24)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32() local
103 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi0xOPQRSTUV, vk0xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32()
118 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi1xOPQRSTUV, vk1xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32()
133 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi2xOPQRSTUV, vk2xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32()
148 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi3xOPQRSTUV, vk3xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32()
163 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi4xOPQRSTUV, vk4xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32()
178 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi5xOPQRSTUV, vk5xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32()
193 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi6xOPQRSTUV, vk6xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32()
208 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi7xOPQRSTUV, vk7xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32()
223 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi8xOPQRSTUV, vk8xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32()
[all …]
Dup32x9-minmax-fp32-avx2-mul16-vpmovsx.c87 …__m256i vaccOPQRSTUV = _mm256_loadu_si256((const __m256i*) ((uintptr_t) w + 24 * sizeof(int32_t))); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx() local
103 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod0xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx()
118 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod1xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx()
133 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod2xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx()
148 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod3xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx()
163 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod4xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx()
178 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod5xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx()
193 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod6xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx()
208 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod7xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx()
223 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_cvtepi16_epi32(vprod8xOPQRSTUV)); in xnn_qc8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul16_vpmovsx()
[all …]
/external/XNNPACK/src/qu8-dwconv/gen/
Dup32x25-minmax-fp32-avx2-mul32.c168 __m256i vaccOPQRSTUV = _mm256_loadu_si256((const __m256i*) ((const int32_t*) w + 24)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32() local
184 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi0xOPQRSTUV, vk0xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32()
199 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi1xOPQRSTUV, vk1xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32()
214 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi2xOPQRSTUV, vk2xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32()
229 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi3xOPQRSTUV, vk3xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32()
244 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi4xOPQRSTUV, vk4xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32()
259 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi5xOPQRSTUV, vk5xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32()
274 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi6xOPQRSTUV, vk6xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32()
289 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi7xOPQRSTUV, vk7xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32()
304 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi8xOPQRSTUV, vk8xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x25__avx2_mul32()
[all …]
Dup32x9-minmax-fp32-avx2-mul32.c88 __m256i vaccOPQRSTUV = _mm256_loadu_si256((const __m256i*) ((const int32_t*) w + 24)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32() local
104 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi0xOPQRSTUV, vk0xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32()
119 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi1xOPQRSTUV, vk1xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32()
134 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi2xOPQRSTUV, vk2xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32()
149 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi3xOPQRSTUV, vk3xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32()
164 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi4xOPQRSTUV, vk4xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32()
179 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi5xOPQRSTUV, vk5xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32()
194 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi6xOPQRSTUV, vk6xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32()
209 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi7xOPQRSTUV, vk7xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32()
224 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vi8xOPQRSTUV, vk8xOPQRSTUV)); in xnn_qu8_dwconv_minmax_fp32_ukernel_up32x9__avx2_mul32()
[all …]
/external/XNNPACK/src/qs8-vadd/gen/
Dminmax-avx2-mul32-ld64-x32.c48 __m256i vaccOPQRSTUV = _mm256_add_epi32(vbias, _mm256_mullo_epi32(vaOPQRSTUV, va_multiplier)); in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32() local
53 vaccOPQRSTUV = _mm256_add_epi32(vaccOPQRSTUV, _mm256_mullo_epi32(vbOPQRSTUV, vb_multiplier)); in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32()
58 vaccOPQRSTUV = _mm256_sra_epi32(vaccOPQRSTUV, vshift); in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32()
61 …__m256i voutGHIJOPQRKLMNSTUV = _mm256_adds_epi16(_mm256_packs_epi32(vaccGHIJKLMN, vaccOPQRSTUV), v… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x32()
/external/XNNPACK/src/qs8-vaddc/gen/
Dminmax-avx2-mul32-ld64-x32.c44 __m256i vaccOPQRSTUV = _mm256_add_epi32(vbias, _mm256_mullo_epi32(vaOPQRSTUV, va_multiplier)); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32() local
49 vaccOPQRSTUV = _mm256_sra_epi32(vaccOPQRSTUV, vshift); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32()
52 …__m256i voutGHIJOPQRKLMNSTUV = _mm256_adds_epi16(_mm256_packs_epi32(vaccGHIJKLMN, vaccOPQRSTUV), v… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x32()
/external/XNNPACK/src/f32-qs8-vcvt/gen/
Dvcvt-neonv8-x32.c65 int16x8_t vaccOPQRSTUV = vcombine_s16(vqmovn_s32(vaccOPQR), vqmovn_s32(vaccSTUV)); in xnn_f32_qs8_vcvt_ukernel__neonv8_x32() local
70 vaccOPQRSTUV = vqaddq_s16(vaccOPQRSTUV, voutput_zero_point); in xnn_f32_qs8_vcvt_ukernel__neonv8_x32()
73 int8x16_t vyGHIJKLMNOPQRSTUV = vcombine_s8(vqmovn_s16(vaccGHIJKLMN), vqmovn_s16(vaccOPQRSTUV)); in xnn_f32_qs8_vcvt_ukernel__neonv8_x32()
Dvcvt-wasmsimd-cvt-x32.c75 v128_t vaccOPQRSTUV = wasm_i16x8_narrow_i32x4(vaccOPQR, vaccSTUV); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x32() local
80 vaccOPQRSTUV = wasm_i16x8_add_sat(vaccOPQRSTUV, voutput_zero_point); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x32()
83 v128_t vyGHIJKLMNOPQRSTUV = wasm_i8x16_narrow_i16x8(vaccGHIJKLMN, vaccOPQRSTUV); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x32()
/external/XNNPACK/src/f32-qu8-vcvt/gen/
Dvcvt-neonv8-x32.c65 int16x8_t vaccOPQRSTUV = vcombine_s16(vqmovn_s32(vaccOPQR), vqmovn_s32(vaccSTUV)); in xnn_f32_qu8_vcvt_ukernel__neonv8_x32() local
70 vaccOPQRSTUV = vqaddq_s16(vaccOPQRSTUV, voutput_zero_point); in xnn_f32_qu8_vcvt_ukernel__neonv8_x32()
73 … uint8x16_t vyGHIJKLMNOPQRSTUV = vcombine_u8(vqmovun_s16(vaccGHIJKLMN), vqmovun_s16(vaccOPQRSTUV)); in xnn_f32_qu8_vcvt_ukernel__neonv8_x32()
Dvcvt-wasmsimd-cvt-x32.c75 v128_t vaccOPQRSTUV = wasm_i16x8_narrow_i32x4(vaccOPQR, vaccSTUV); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x32() local
80 vaccOPQRSTUV = wasm_i16x8_add_sat(vaccOPQRSTUV, voutput_zero_point); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x32()
83 v128_t vyGHIJKLMNOPQRSTUV = wasm_u8x16_narrow_i16x8(vaccGHIJKLMN, vaccOPQRSTUV); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x32()

1234