/external/XNNPACK/src/qs8-vaddc/gen/ |
D | minmax-sse2-mul16-ld64-x16.c | 65 __m128i vout89ABCDEF = _mm_adds_epi16(_mm_packs_epi32(vacc89AB, vaccCDEF), voutput_zero_point); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16() local 68 vout89ABCDEF = _mm_max_epi16(vout89ABCDEF, voutput_min); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16() 71 vout89ABCDEF = _mm_min_epi16(vout89ABCDEF, voutput_max); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16() 73 __m128i vout0123456789ABCDEF = _mm_packs_epi16(vout01234567, vout89ABCDEF); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x16()
|
D | minmax-sse2-mul16-ld64-x24.c | 75 __m128i vout89ABCDEF = _mm_adds_epi16(_mm_packs_epi32(vacc89AB, vaccCDEF), voutput_zero_point); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24() local 79 vout89ABCDEF = _mm_max_epi16(vout89ABCDEF, voutput_min); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24() 83 vout89ABCDEF = _mm_min_epi16(vout89ABCDEF, voutput_max); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24() 86 __m128i vout0123456789ABCDEF = _mm_packs_epi16(vout01234567, vout89ABCDEF); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x24()
|
D | minmax-sse2-mul16-ld64-x32.c | 85 __m128i vout89ABCDEF = _mm_adds_epi16(_mm_packs_epi32(vacc89AB, vaccCDEF), voutput_zero_point); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32() local 90 vout89ABCDEF = _mm_max_epi16(vout89ABCDEF, voutput_min); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32() 95 vout89ABCDEF = _mm_min_epi16(vout89ABCDEF, voutput_max); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32() 99 __m128i vout0123456789ABCDEF = _mm_packs_epi16(vout01234567, vout89ABCDEF); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x32()
|
D | minmax-wasmsimd-x16.c | 49 …v128_t vout89ABCDEF = wasm_i16x8_add_sat(wasm_i16x8_narrow_i32x4(vacc89AB, vaccCDEF), voutput_zero… in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16() local 51 v128_t vout0123456789ABCDEF = wasm_i8x16_narrow_i16x8(vout01234567, vout89ABCDEF); in xnn_qs8_vaddc_minmax_ukernel__wasmsimd_x16()
|
D | minmax-xop-mul32-ld32-x16.c | 59 …const __m128i vout89ABCDEF = _mm_adds_epi16(_mm_packs_epi32(vacc89AB, vaccCDEF), voutput_zero_poin… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16() local 61 __m128i vout0123456789ABCDEF = _mm_packs_epi16(vout01234567, vout89ABCDEF); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x16()
|
D | minmax-avx-mul32-ld32-x16.c | 54 …const __m128i vout89ABCDEF = _mm_adds_epi16(_mm_packs_epi32(vacc89AB, vaccCDEF), voutput_zero_poin… in xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x16() local 56 __m128i vout0123456789ABCDEF = _mm_packs_epi16(vout01234567, vout89ABCDEF); in xnn_qs8_vaddc_minmax_ukernel__avx_mul32_ld32_x16()
|
D | minmax-sse41-mul32-ld32-x16.c | 54 …const __m128i vout89ABCDEF = _mm_adds_epi16(_mm_packs_epi32(vacc89AB, vaccCDEF), voutput_zero_poin… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16() local 56 __m128i vout0123456789ABCDEF = _mm_packs_epi16(vout01234567, vout89ABCDEF); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16()
|
/external/XNNPACK/src/f16-spmm/gen/ |
D | 24x1-minmax-neonfp16arith.c | 63 float16x8_t vout89ABCDEF = vminq_f16(vacc89ABCDEF, vmax); in xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith() local 66 vout89ABCDEF = vmaxq_f16(vout89ABCDEF, vmin); in xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith() 69 vst1q_f16(o + 8, vout89ABCDEF); in xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith() 100 float16x8_t vout89ABCDEF = vminq_f16(vacc89ABCDEF, vmax); in xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith() local 102 vout89ABCDEF = vmaxq_f16(vout89ABCDEF, vmin); in xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith() 104 vst1q_f16(o + 8, vout89ABCDEF); in xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith()
|
D | 32x1-minmax-neonfp16arith.c | 66 float16x8_t vout89ABCDEF = vminq_f16(vacc89ABCDEF, vmax); in xnn_f16_spmm_minmax_ukernel_32x1__neonfp16arith() local 70 vout89ABCDEF = vmaxq_f16(vout89ABCDEF, vmin); in xnn_f16_spmm_minmax_ukernel_32x1__neonfp16arith() 74 vst1q_f16(o + 8, vout89ABCDEF); in xnn_f16_spmm_minmax_ukernel_32x1__neonfp16arith() 106 float16x8_t vout89ABCDEF = vminq_f16(vacc89ABCDEF, vmax); in xnn_f16_spmm_minmax_ukernel_32x1__neonfp16arith() local 108 vout89ABCDEF = vmaxq_f16(vout89ABCDEF, vmin); in xnn_f16_spmm_minmax_ukernel_32x1__neonfp16arith() 110 vst1q_f16(o + 8, vout89ABCDEF); in xnn_f16_spmm_minmax_ukernel_32x1__neonfp16arith()
|
D | 24x1-minmax-neonfp16arith-x2.c | 93 float16x8_t vout89ABCDEF = vminq_f16(vacc89ABCDEF, vmax); in xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith_x2() local 96 vout89ABCDEF = vmaxq_f16(vout89ABCDEF, vmin); in xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith_x2() 99 vst1q_f16(o + 8, vout89ABCDEF); in xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith_x2() 130 float16x8_t vout89ABCDEF = vminq_f16(vacc89ABCDEF, vmax); in xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith_x2() local 132 vout89ABCDEF = vmaxq_f16(vout89ABCDEF, vmin); in xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith_x2() 134 vst1q_f16(o + 8, vout89ABCDEF); in xnn_f16_spmm_minmax_ukernel_24x1__neonfp16arith_x2()
|
D | 32x1-minmax-neonfp16arith-x2.c | 103 float16x8_t vout89ABCDEF = vminq_f16(vacc89ABCDEF, vmax); in xnn_f16_spmm_minmax_ukernel_32x1__neonfp16arith_x2() local 107 vout89ABCDEF = vmaxq_f16(vout89ABCDEF, vmin); in xnn_f16_spmm_minmax_ukernel_32x1__neonfp16arith_x2() 111 vst1q_f16(o + 8, vout89ABCDEF); in xnn_f16_spmm_minmax_ukernel_32x1__neonfp16arith_x2() 143 float16x8_t vout89ABCDEF = vminq_f16(vacc89ABCDEF, vmax); in xnn_f16_spmm_minmax_ukernel_32x1__neonfp16arith_x2() local 145 vout89ABCDEF = vmaxq_f16(vout89ABCDEF, vmin); in xnn_f16_spmm_minmax_ukernel_32x1__neonfp16arith_x2() 147 vst1q_f16(o + 8, vout89ABCDEF); in xnn_f16_spmm_minmax_ukernel_32x1__neonfp16arith_x2()
|
D | 16x1-minmax-neonfp16arith.c | 60 float16x8_t vout89ABCDEF = vminq_f16(vacc89ABCDEF, vmax); in xnn_f16_spmm_minmax_ukernel_16x1__neonfp16arith() local 62 vout89ABCDEF = vmaxq_f16(vout89ABCDEF, vmin); in xnn_f16_spmm_minmax_ukernel_16x1__neonfp16arith() 64 vst1q_f16(o + 8, vout89ABCDEF); in xnn_f16_spmm_minmax_ukernel_16x1__neonfp16arith()
|
D | 16x1-minmax-neonfp16arith-x2.c | 83 float16x8_t vout89ABCDEF = vminq_f16(vacc89ABCDEF, vmax); in xnn_f16_spmm_minmax_ukernel_16x1__neonfp16arith_x2() local 85 vout89ABCDEF = vmaxq_f16(vout89ABCDEF, vmin); in xnn_f16_spmm_minmax_ukernel_16x1__neonfp16arith_x2() 87 vst1q_f16(o + 8, vout89ABCDEF); in xnn_f16_spmm_minmax_ukernel_16x1__neonfp16arith_x2()
|
/external/XNNPACK/src/qs8-vmulc/gen/ |
D | minmax-fp32-sse2-mul16-ld64-x16.c | 72 __m128i vout89ABCDEF = _mm_adds_epi16(_mm_packs_epi32(vacc89AB, vaccCDEF), voutput_zero_point); in xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x16() local 75 vout89ABCDEF = _mm_max_epi16(vout89ABCDEF, voutput_min); in xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x16() 78 vout89ABCDEF = _mm_min_epi16(vout89ABCDEF, voutput_max); in xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x16() 80 __m128i vout0123456789ABCDEF = _mm_packs_epi16(vout01234567, vout89ABCDEF); in xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x16()
|
/external/XNNPACK/src/qs8-vmul/gen/ |
D | minmax-fp32-sse2-mul16-ld64-x16.c | 77 __m128i vout89ABCDEF = _mm_adds_epi16(_mm_packs_epi32(vacc89AB, vaccCDEF), voutput_zero_point); in xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x16() local 80 vout89ABCDEF = _mm_max_epi16(vout89ABCDEF, voutput_min); in xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x16() 83 vout89ABCDEF = _mm_min_epi16(vout89ABCDEF, voutput_max); in xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x16() 85 __m128i vout0123456789ABCDEF = _mm_packs_epi16(vout01234567, vout89ABCDEF); in xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x16()
|
/external/XNNPACK/src/qs8-vadd/gen/ |
D | minmax-sse2-mul16-ld64-x16.c | 83 __m128i vout89ABCDEF = _mm_adds_epi16(_mm_packs_epi32(vacc89AB, vaccCDEF), voutput_zero_point); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16() local 86 vout89ABCDEF = _mm_max_epi16(vout89ABCDEF, voutput_min); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16() 89 vout89ABCDEF = _mm_min_epi16(vout89ABCDEF, voutput_max); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16() 91 __m128i vout0123456789ABCDEF = _mm_packs_epi16(vout01234567, vout89ABCDEF); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x16()
|
D | minmax-sse2-mul16-ld64-x24.c | 101 __m128i vout89ABCDEF = _mm_adds_epi16(_mm_packs_epi32(vacc89AB, vaccCDEF), voutput_zero_point); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24() local 105 vout89ABCDEF = _mm_max_epi16(vout89ABCDEF, voutput_min); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24() 109 vout89ABCDEF = _mm_min_epi16(vout89ABCDEF, voutput_max); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24() 112 __m128i vout0123456789ABCDEF = _mm_packs_epi16(vout01234567, vout89ABCDEF); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x24()
|
D | minmax-sse2-mul16-ld64-x32.c | 119 __m128i vout89ABCDEF = _mm_adds_epi16(_mm_packs_epi32(vacc89AB, vaccCDEF), voutput_zero_point); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32() local 124 vout89ABCDEF = _mm_max_epi16(vout89ABCDEF, voutput_min); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32() 129 vout89ABCDEF = _mm_min_epi16(vout89ABCDEF, voutput_max); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32() 133 __m128i vout0123456789ABCDEF = _mm_packs_epi16(vout01234567, vout89ABCDEF); in xnn_qs8_vadd_minmax_ukernel__sse2_mul16_ld64_x32()
|
/external/XNNPACK/src/f16-gavgpool/gen/ |
D | 7x-minmax-f16c-c16.c | 102 __m256 vout89ABCDEF = _mm256_max_ps(_mm256_cvtph_ps(vacc89ABCDEF), vmin); in xnn_f16_gavgpool_minmax_ukernel_7x__f16c_c16() local 105 vout89ABCDEF = _mm256_min_ps(vout89ABCDEF, vmax); in xnn_f16_gavgpool_minmax_ukernel_7x__f16c_c16() 108 _mm_storeu_si128((__m128i*) (o + 8), _mm256_cvtps_ph(vout89ABCDEF, _MM_FROUND_NO_EXC)); in xnn_f16_gavgpool_minmax_ukernel_7x__f16c_c16()
|
D | 7x-minmax-f16c-c24.c | 116 __m256 vout89ABCDEF = _mm256_max_ps(_mm256_cvtph_ps(vacc89ABCDEF), vmin); in xnn_f16_gavgpool_minmax_ukernel_7x__f16c_c24() local 120 vout89ABCDEF = _mm256_min_ps(vout89ABCDEF, vmax); in xnn_f16_gavgpool_minmax_ukernel_7x__f16c_c24() 124 _mm_storeu_si128((__m128i*) (o + 8), _mm256_cvtps_ph(vout89ABCDEF, _MM_FROUND_NO_EXC)); in xnn_f16_gavgpool_minmax_ukernel_7x__f16c_c24()
|
D | 7x-minmax-f16c-c32.c | 130 __m256 vout89ABCDEF = _mm256_max_ps(_mm256_cvtph_ps(vacc89ABCDEF), vmin); in xnn_f16_gavgpool_minmax_ukernel_7x__f16c_c32() local 135 vout89ABCDEF = _mm256_min_ps(vout89ABCDEF, vmax); in xnn_f16_gavgpool_minmax_ukernel_7x__f16c_c32() 140 _mm_storeu_si128((__m128i*) (o + 8), _mm256_cvtps_ph(vout89ABCDEF, _MM_FROUND_NO_EXC)); in xnn_f16_gavgpool_minmax_ukernel_7x__f16c_c32()
|
/external/XNNPACK/src/qs8-gavgpool/gen/ |
D | 7x-minmax-fp32-sse2-c16.c | 153 __m128i vout89ABCDEF = _mm_adds_epi16(_mm_packs_epi32(vacc89AB, vaccCDEF), voutput_zero_point); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__sse2_c16() local 156 vout89ABCDEF = _mm_max_epi16(vout89ABCDEF, voutput_min); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__sse2_c16() 158 __m128i vout0123456789ABCDEF = _mm_packs_epi16(vout01234567, vout89ABCDEF); in xnn_qs8_gavgpool_minmax_fp32_ukernel_7x__sse2_c16()
|
/external/XNNPACK/src/qu8-vaddc/gen/ |
D | minmax-wasmsimd-x16.c | 49 …v128_t vout89ABCDEF = wasm_i16x8_add_sat(wasm_i16x8_narrow_i32x4(vacc89AB, vaccCDEF), voutput_zero… in xnn_qu8_vaddc_minmax_ukernel__wasmsimd_x16() local 51 v128_t vout0123456789ABCDEF = wasm_u8x16_narrow_i16x8(vout01234567, vout89ABCDEF); in xnn_qu8_vaddc_minmax_ukernel__wasmsimd_x16()
|
D | minmax-xop-mul32-ld32-x16.c | 59 …const __m128i vout89ABCDEF = _mm_adds_epi16(_mm_packs_epi32(vacc89AB, vaccCDEF), voutput_zero_poin… in xnn_qu8_vaddc_minmax_ukernel__xop_mul32_ld32_x16() local 61 __m128i vout0123456789ABCDEF = _mm_packus_epi16(vout01234567, vout89ABCDEF); in xnn_qu8_vaddc_minmax_ukernel__xop_mul32_ld32_x16()
|
D | minmax-sse41-mul32-ld32-x16.c | 54 …const __m128i vout89ABCDEF = _mm_adds_epi16(_mm_packs_epi32(vacc89AB, vaccCDEF), voutput_zero_poin… in xnn_qu8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16() local 56 __m128i vout0123456789ABCDEF = _mm_packus_epi16(vout01234567, vout89ABCDEF); in xnn_qu8_vaddc_minmax_ukernel__sse41_mul32_ld32_x16()
|