/external/XNNPACK/src/f32-qs8-vcvt/gen/ |
D | vcvt-sse2-x32.c | 31 const __m128 voutput_max_less_zero_point = _mm_load_ps(params->sse2.output_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse2_x32() local 55 vx0123 = _mm_min_ps(vx0123, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse2_x32() 56 vx4567 = _mm_min_ps(vx4567, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse2_x32() 57 vx89AB = _mm_min_ps(vx89AB, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse2_x32() 58 vxCDEF = _mm_min_ps(vxCDEF, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse2_x32() 59 vxGHIJ = _mm_min_ps(vxGHIJ, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse2_x32() 60 vxKLMN = _mm_min_ps(vxKLMN, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse2_x32() 61 vxOPQR = _mm_min_ps(vxOPQR, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse2_x32() 62 vxSTUV = _mm_min_ps(vxSTUV, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse2_x32() 104 vx_lo = _mm_min_ps(vx_lo, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse2_x32() [all …]
|
D | vcvt-sse41-x32.c | 31 const __m128 voutput_max_less_zero_point = _mm_load_ps(params->sse4.output_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse41_x32() local 55 vx0123 = _mm_min_ps(vx0123, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse41_x32() 56 vx4567 = _mm_min_ps(vx4567, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse41_x32() 57 vx89AB = _mm_min_ps(vx89AB, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse41_x32() 58 vxCDEF = _mm_min_ps(vxCDEF, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse41_x32() 59 vxGHIJ = _mm_min_ps(vxGHIJ, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse41_x32() 60 vxKLMN = _mm_min_ps(vxKLMN, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse41_x32() 61 vxOPQR = _mm_min_ps(vxOPQR, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse41_x32() 62 vxSTUV = _mm_min_ps(vxSTUV, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse41_x32() 102 vx_lo = _mm_min_ps(vx_lo, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse41_x32() [all …]
|
D | vcvt-sse2-x24.c | 31 const __m128 voutput_max_less_zero_point = _mm_load_ps(params->sse2.output_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse2_x24() local 51 vx0123 = _mm_min_ps(vx0123, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse2_x24() 52 vx4567 = _mm_min_ps(vx4567, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse2_x24() 53 vx89AB = _mm_min_ps(vx89AB, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse2_x24() 54 vxCDEF = _mm_min_ps(vxCDEF, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse2_x24() 55 vxGHIJ = _mm_min_ps(vxGHIJ, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse2_x24() 56 vxKLMN = _mm_min_ps(vxKLMN, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse2_x24() 93 vx_lo = _mm_min_ps(vx_lo, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse2_x24() 94 vx_hi = _mm_min_ps(vx_hi, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse2_x24() 115 vx_lo = _mm_min_ps(vx_lo, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse2_x24() [all …]
|
D | vcvt-sse41-x24.c | 31 const __m128 voutput_max_less_zero_point = _mm_load_ps(params->sse4.output_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse41_x24() local 51 vx0123 = _mm_min_ps(vx0123, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse41_x24() 52 vx4567 = _mm_min_ps(vx4567, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse41_x24() 53 vx89AB = _mm_min_ps(vx89AB, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse41_x24() 54 vxCDEF = _mm_min_ps(vxCDEF, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse41_x24() 55 vxGHIJ = _mm_min_ps(vxGHIJ, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse41_x24() 56 vxKLMN = _mm_min_ps(vxKLMN, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse41_x24() 92 vx_lo = _mm_min_ps(vx_lo, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse41_x24() 93 vx_hi = _mm_min_ps(vx_hi, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse41_x24() 114 vx_lo = _mm_min_ps(vx_lo, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__sse41_x24() [all …]
|
D | vcvt-avx2-x64.c | 31 …const __m256 voutput_max_less_zero_point = _mm256_load_ps(params->avx2.output_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__avx2_x64() local 56 vx01 = _mm256_min_ps(vx01, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__avx2_x64() 57 vx23 = _mm256_min_ps(vx23, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__avx2_x64() 58 vx45 = _mm256_min_ps(vx45, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__avx2_x64() 59 vx67 = _mm256_min_ps(vx67, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__avx2_x64() 60 vx89 = _mm256_min_ps(vx89, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__avx2_x64() 61 vxAB = _mm256_min_ps(vxAB, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__avx2_x64() 62 vxCD = _mm256_min_ps(vxCD, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__avx2_x64() 63 vxEF = _mm256_min_ps(vxEF, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__avx2_x64() 100 vx = _mm256_min_ps(vx, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__avx2_x64() [all …]
|
D | vcvt-avx512skx-x128.c | 31 …const __m512 voutput_max_less_zero_point = _mm512_load_ps(params->avx512.output_max_less_zero_poin… in xnn_f32_qs8_vcvt_ukernel__avx512skx_x128() local 55 vx0123 = _mm512_min_ps(vx0123, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x128() 56 vx4567 = _mm512_min_ps(vx4567, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x128() 57 vx89AB = _mm512_min_ps(vx89AB, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x128() 58 vxCDEF = _mm512_min_ps(vxCDEF, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x128() 59 vxGHIJ = _mm512_min_ps(vxGHIJ, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x128() 60 vxKLMN = _mm512_min_ps(vxKLMN, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x128() 61 vxOPQR = _mm512_min_ps(vxOPQR, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x128() 62 vxSTUV = _mm512_min_ps(vxSTUV, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x128() 99 vx0123 = _mm512_min_ps(vx0123, voutput_max_less_zero_point); in xnn_f32_qs8_vcvt_ukernel__avx512skx_x128() [all …]
|
/external/XNNPACK/src/f32-qu8-vcvt/gen/ |
D | vcvt-sse2-x32.c | 31 const __m128 voutput_max_less_zero_point = _mm_load_ps(params->sse2.output_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__sse2_x32() local 55 vx0123 = _mm_min_ps(vx0123, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__sse2_x32() 56 vx4567 = _mm_min_ps(vx4567, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__sse2_x32() 57 vx89AB = _mm_min_ps(vx89AB, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__sse2_x32() 58 vxCDEF = _mm_min_ps(vxCDEF, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__sse2_x32() 59 vxGHIJ = _mm_min_ps(vxGHIJ, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__sse2_x32() 60 vxKLMN = _mm_min_ps(vxKLMN, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__sse2_x32() 61 vxOPQR = _mm_min_ps(vxOPQR, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__sse2_x32() 62 vxSTUV = _mm_min_ps(vxSTUV, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__sse2_x32() 102 vx_lo = _mm_min_ps(vx_lo, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__sse2_x32() [all …]
|
D | vcvt-sse2-x24.c | 31 const __m128 voutput_max_less_zero_point = _mm_load_ps(params->sse2.output_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__sse2_x24() local 51 vx0123 = _mm_min_ps(vx0123, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__sse2_x24() 52 vx4567 = _mm_min_ps(vx4567, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__sse2_x24() 53 vx89AB = _mm_min_ps(vx89AB, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__sse2_x24() 54 vxCDEF = _mm_min_ps(vxCDEF, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__sse2_x24() 55 vxGHIJ = _mm_min_ps(vxGHIJ, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__sse2_x24() 56 vxKLMN = _mm_min_ps(vxKLMN, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__sse2_x24() 92 vx_lo = _mm_min_ps(vx_lo, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__sse2_x24() 93 vx_hi = _mm_min_ps(vx_hi, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__sse2_x24() 114 vx_lo = _mm_min_ps(vx_lo, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__sse2_x24() [all …]
|
D | vcvt-avx512skx-x128.c | 31 …const __m512 voutput_max_less_zero_point = _mm512_load_ps(params->avx512.output_max_less_zero_poin… in xnn_f32_qu8_vcvt_ukernel__avx512skx_x128() local 55 vx0123 = _mm512_min_ps(vx0123, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x128() 56 vx4567 = _mm512_min_ps(vx4567, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x128() 57 vx89AB = _mm512_min_ps(vx89AB, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x128() 58 vxCDEF = _mm512_min_ps(vxCDEF, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x128() 59 vxGHIJ = _mm512_min_ps(vxGHIJ, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x128() 60 vxKLMN = _mm512_min_ps(vxKLMN, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x128() 61 vxOPQR = _mm512_min_ps(vxOPQR, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x128() 62 vxSTUV = _mm512_min_ps(vxSTUV, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x128() 99 vx0123 = _mm512_min_ps(vx0123, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__avx512skx_x128() [all …]
|
D | vcvt-avx2-x64.c | 31 …const __m256 voutput_max_less_zero_point = _mm256_load_ps(params->avx2.output_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__avx2_x64() local 56 vx01 = _mm256_min_ps(vx01, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__avx2_x64() 57 vx23 = _mm256_min_ps(vx23, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__avx2_x64() 58 vx45 = _mm256_min_ps(vx45, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__avx2_x64() 59 vx67 = _mm256_min_ps(vx67, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__avx2_x64() 60 vx89 = _mm256_min_ps(vx89, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__avx2_x64() 61 vxAB = _mm256_min_ps(vxAB, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__avx2_x64() 62 vxCD = _mm256_min_ps(vxCD, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__avx2_x64() 63 vxEF = _mm256_min_ps(vxEF, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__avx2_x64() 100 vx = _mm256_min_ps(vx, voutput_max_less_zero_point); in xnn_f32_qu8_vcvt_ukernel__avx2_x64() [all …]
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 4x4-minmax-fp32-wasm-fmagic.c | 159 const float voutput_max_less_zero_point = params->fp32_scalar_fmagic.output_max_less_zero_point; in xnn_qs8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic() local 160 vfpacc0x0 = __builtin_wasm_min_f32(vfpacc0x0, voutput_max_less_zero_point); in xnn_qs8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic() 161 vfpacc0x1 = __builtin_wasm_min_f32(vfpacc0x1, voutput_max_less_zero_point); in xnn_qs8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic() 162 vfpacc0x2 = __builtin_wasm_min_f32(vfpacc0x2, voutput_max_less_zero_point); in xnn_qs8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic() 163 vfpacc0x3 = __builtin_wasm_min_f32(vfpacc0x3, voutput_max_less_zero_point); in xnn_qs8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic() 164 vfpacc1x0 = __builtin_wasm_min_f32(vfpacc1x0, voutput_max_less_zero_point); in xnn_qs8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic() 165 vfpacc1x1 = __builtin_wasm_min_f32(vfpacc1x1, voutput_max_less_zero_point); in xnn_qs8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic() 166 vfpacc1x2 = __builtin_wasm_min_f32(vfpacc1x2, voutput_max_less_zero_point); in xnn_qs8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic() 167 vfpacc1x3 = __builtin_wasm_min_f32(vfpacc1x3, voutput_max_less_zero_point); in xnn_qs8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic() 168 vfpacc2x0 = __builtin_wasm_min_f32(vfpacc2x0, voutput_max_less_zero_point); in xnn_qs8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic() [all …]
|
D | 4x4-minmax-fp32-scalar-fmagic.c | 159 const float voutput_max_less_zero_point = params->fp32_scalar_fmagic.output_max_less_zero_point; in xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_fmagic() local 160 vfpacc0x0 = math_min_f32(vfpacc0x0, voutput_max_less_zero_point); in xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_fmagic() 161 vfpacc0x1 = math_min_f32(vfpacc0x1, voutput_max_less_zero_point); in xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_fmagic() 162 vfpacc0x2 = math_min_f32(vfpacc0x2, voutput_max_less_zero_point); in xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_fmagic() 163 vfpacc0x3 = math_min_f32(vfpacc0x3, voutput_max_less_zero_point); in xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_fmagic() 164 vfpacc1x0 = math_min_f32(vfpacc1x0, voutput_max_less_zero_point); in xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_fmagic() 165 vfpacc1x1 = math_min_f32(vfpacc1x1, voutput_max_less_zero_point); in xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_fmagic() 166 vfpacc1x2 = math_min_f32(vfpacc1x2, voutput_max_less_zero_point); in xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_fmagic() 167 vfpacc1x3 = math_min_f32(vfpacc1x3, voutput_max_less_zero_point); in xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_fmagic() 168 vfpacc2x0 = math_min_f32(vfpacc2x0, voutput_max_less_zero_point); in xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_fmagic() [all …]
|
D | 4x4-minmax-fp32-scalar-lrintf.c | 160 const float voutput_max_less_zero_point = params->fp32_scalar_lrintf.output_max_less_zero_point; in xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf() local 161 vfpacc0x0 = math_min_f32(vfpacc0x0, voutput_max_less_zero_point); in xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf() 162 vfpacc0x1 = math_min_f32(vfpacc0x1, voutput_max_less_zero_point); in xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf() 163 vfpacc0x2 = math_min_f32(vfpacc0x2, voutput_max_less_zero_point); in xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf() 164 vfpacc0x3 = math_min_f32(vfpacc0x3, voutput_max_less_zero_point); in xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf() 165 vfpacc1x0 = math_min_f32(vfpacc1x0, voutput_max_less_zero_point); in xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf() 166 vfpacc1x1 = math_min_f32(vfpacc1x1, voutput_max_less_zero_point); in xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf() 167 vfpacc1x2 = math_min_f32(vfpacc1x2, voutput_max_less_zero_point); in xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf() 168 vfpacc1x3 = math_min_f32(vfpacc1x3, voutput_max_less_zero_point); in xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf() 169 vfpacc2x0 = math_min_f32(vfpacc2x0, voutput_max_less_zero_point); in xnn_qs8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf() [all …]
|
/external/XNNPACK/src/qc8-gemm/gen/ |
D | 4x4-minmax-fp32-wasm-fmagic.c | 163 const float voutput_max_less_zero_point = params->fp32_scalar_fmagic.output_max_less_zero_point; in xnn_qc8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic() local 164 vfpacc0x0 = __builtin_wasm_min_f32(vfpacc0x0, voutput_max_less_zero_point); in xnn_qc8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic() 165 vfpacc0x1 = __builtin_wasm_min_f32(vfpacc0x1, voutput_max_less_zero_point); in xnn_qc8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic() 166 vfpacc0x2 = __builtin_wasm_min_f32(vfpacc0x2, voutput_max_less_zero_point); in xnn_qc8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic() 167 vfpacc0x3 = __builtin_wasm_min_f32(vfpacc0x3, voutput_max_less_zero_point); in xnn_qc8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic() 168 vfpacc1x0 = __builtin_wasm_min_f32(vfpacc1x0, voutput_max_less_zero_point); in xnn_qc8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic() 169 vfpacc1x1 = __builtin_wasm_min_f32(vfpacc1x1, voutput_max_less_zero_point); in xnn_qc8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic() 170 vfpacc1x2 = __builtin_wasm_min_f32(vfpacc1x2, voutput_max_less_zero_point); in xnn_qc8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic() 171 vfpacc1x3 = __builtin_wasm_min_f32(vfpacc1x3, voutput_max_less_zero_point); in xnn_qc8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic() 172 vfpacc2x0 = __builtin_wasm_min_f32(vfpacc2x0, voutput_max_less_zero_point); in xnn_qc8_gemm_minmax_fp32_ukernel_4x4__wasm_fmagic() [all …]
|
D | 4x4-minmax-fp32-scalar-fmagic.c | 163 const float voutput_max_less_zero_point = params->fp32_scalar_fmagic.output_max_less_zero_point; in xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_fmagic() local 164 vfpacc0x0 = math_min_f32(vfpacc0x0, voutput_max_less_zero_point); in xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_fmagic() 165 vfpacc0x1 = math_min_f32(vfpacc0x1, voutput_max_less_zero_point); in xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_fmagic() 166 vfpacc0x2 = math_min_f32(vfpacc0x2, voutput_max_less_zero_point); in xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_fmagic() 167 vfpacc0x3 = math_min_f32(vfpacc0x3, voutput_max_less_zero_point); in xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_fmagic() 168 vfpacc1x0 = math_min_f32(vfpacc1x0, voutput_max_less_zero_point); in xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_fmagic() 169 vfpacc1x1 = math_min_f32(vfpacc1x1, voutput_max_less_zero_point); in xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_fmagic() 170 vfpacc1x2 = math_min_f32(vfpacc1x2, voutput_max_less_zero_point); in xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_fmagic() 171 vfpacc1x3 = math_min_f32(vfpacc1x3, voutput_max_less_zero_point); in xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_fmagic() 172 vfpacc2x0 = math_min_f32(vfpacc2x0, voutput_max_less_zero_point); in xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_fmagic() [all …]
|
D | 4x4-minmax-fp32-scalar-lrintf.c | 164 const float voutput_max_less_zero_point = params->fp32_scalar_lrintf.output_max_less_zero_point; in xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf() local 165 vfpacc0x0 = math_min_f32(vfpacc0x0, voutput_max_less_zero_point); in xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf() 166 vfpacc0x1 = math_min_f32(vfpacc0x1, voutput_max_less_zero_point); in xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf() 167 vfpacc0x2 = math_min_f32(vfpacc0x2, voutput_max_less_zero_point); in xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf() 168 vfpacc0x3 = math_min_f32(vfpacc0x3, voutput_max_less_zero_point); in xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf() 169 vfpacc1x0 = math_min_f32(vfpacc1x0, voutput_max_less_zero_point); in xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf() 170 vfpacc1x1 = math_min_f32(vfpacc1x1, voutput_max_less_zero_point); in xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf() 171 vfpacc1x2 = math_min_f32(vfpacc1x2, voutput_max_less_zero_point); in xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf() 172 vfpacc1x3 = math_min_f32(vfpacc1x3, voutput_max_less_zero_point); in xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf() 173 vfpacc2x0 = math_min_f32(vfpacc2x0, voutput_max_less_zero_point); in xnn_qc8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf() [all …]
|
/external/XNNPACK/src/qu8-igemm/gen/ |
D | 4x4-minmax-fp32-scalar-fmagic.c | 186 const float voutput_max_less_zero_point = params->fp32_scalar_fmagic.output_max_less_zero_point; in xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic() local 187 vfpacc0x0 = math_min_f32(vfpacc0x0, voutput_max_less_zero_point); in xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic() 188 vfpacc0x1 = math_min_f32(vfpacc0x1, voutput_max_less_zero_point); in xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic() 189 vfpacc0x2 = math_min_f32(vfpacc0x2, voutput_max_less_zero_point); in xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic() 190 vfpacc0x3 = math_min_f32(vfpacc0x3, voutput_max_less_zero_point); in xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic() 191 vfpacc1x0 = math_min_f32(vfpacc1x0, voutput_max_less_zero_point); in xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic() 192 vfpacc1x1 = math_min_f32(vfpacc1x1, voutput_max_less_zero_point); in xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic() 193 vfpacc1x2 = math_min_f32(vfpacc1x2, voutput_max_less_zero_point); in xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic() 194 vfpacc1x3 = math_min_f32(vfpacc1x3, voutput_max_less_zero_point); in xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic() 195 vfpacc2x0 = math_min_f32(vfpacc2x0, voutput_max_less_zero_point); in xnn_qu8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic() [all …]
|
D | 4x4-minmax-fp32-wasm-fmagic.c | 186 const float voutput_max_less_zero_point = params->fp32_scalar_fmagic.output_max_less_zero_point; in xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic() local 187 vfpacc0x0 = __builtin_wasm_min_f32(vfpacc0x0, voutput_max_less_zero_point); in xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic() 188 vfpacc0x1 = __builtin_wasm_min_f32(vfpacc0x1, voutput_max_less_zero_point); in xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic() 189 vfpacc0x2 = __builtin_wasm_min_f32(vfpacc0x2, voutput_max_less_zero_point); in xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic() 190 vfpacc0x3 = __builtin_wasm_min_f32(vfpacc0x3, voutput_max_less_zero_point); in xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic() 191 vfpacc1x0 = __builtin_wasm_min_f32(vfpacc1x0, voutput_max_less_zero_point); in xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic() 192 vfpacc1x1 = __builtin_wasm_min_f32(vfpacc1x1, voutput_max_less_zero_point); in xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic() 193 vfpacc1x2 = __builtin_wasm_min_f32(vfpacc1x2, voutput_max_less_zero_point); in xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic() 194 vfpacc1x3 = __builtin_wasm_min_f32(vfpacc1x3, voutput_max_less_zero_point); in xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic() 195 vfpacc2x0 = __builtin_wasm_min_f32(vfpacc2x0, voutput_max_less_zero_point); in xnn_qu8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic() [all …]
|
/external/XNNPACK/src/qu8-gemm/gen/ |
D | 4x4-minmax-fp32-scalar-fmagic.c | 160 const float voutput_max_less_zero_point = params->fp32_scalar_fmagic.output_max_less_zero_point; in xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_fmagic() local 161 vfpacc0x0 = math_min_f32(vfpacc0x0, voutput_max_less_zero_point); in xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_fmagic() 162 vfpacc0x1 = math_min_f32(vfpacc0x1, voutput_max_less_zero_point); in xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_fmagic() 163 vfpacc0x2 = math_min_f32(vfpacc0x2, voutput_max_less_zero_point); in xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_fmagic() 164 vfpacc0x3 = math_min_f32(vfpacc0x3, voutput_max_less_zero_point); in xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_fmagic() 165 vfpacc1x0 = math_min_f32(vfpacc1x0, voutput_max_less_zero_point); in xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_fmagic() 166 vfpacc1x1 = math_min_f32(vfpacc1x1, voutput_max_less_zero_point); in xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_fmagic() 167 vfpacc1x2 = math_min_f32(vfpacc1x2, voutput_max_less_zero_point); in xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_fmagic() 168 vfpacc1x3 = math_min_f32(vfpacc1x3, voutput_max_less_zero_point); in xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_fmagic() 169 vfpacc2x0 = math_min_f32(vfpacc2x0, voutput_max_less_zero_point); in xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_fmagic() [all …]
|
D | 4x4-minmax-fp32-scalar-lrintf.c | 161 const float voutput_max_less_zero_point = params->fp32_scalar_lrintf.output_max_less_zero_point; in xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf() local 162 vfpacc0x0 = math_min_f32(vfpacc0x0, voutput_max_less_zero_point); in xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf() 163 vfpacc0x1 = math_min_f32(vfpacc0x1, voutput_max_less_zero_point); in xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf() 164 vfpacc0x2 = math_min_f32(vfpacc0x2, voutput_max_less_zero_point); in xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf() 165 vfpacc0x3 = math_min_f32(vfpacc0x3, voutput_max_less_zero_point); in xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf() 166 vfpacc1x0 = math_min_f32(vfpacc1x0, voutput_max_less_zero_point); in xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf() 167 vfpacc1x1 = math_min_f32(vfpacc1x1, voutput_max_less_zero_point); in xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf() 168 vfpacc1x2 = math_min_f32(vfpacc1x2, voutput_max_less_zero_point); in xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf() 169 vfpacc1x3 = math_min_f32(vfpacc1x3, voutput_max_less_zero_point); in xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf() 170 vfpacc2x0 = math_min_f32(vfpacc2x0, voutput_max_less_zero_point); in xnn_qu8_gemm_minmax_fp32_ukernel_4x4__scalar_lrintf() [all …]
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 4x4-minmax-fp32-wasm-fmagic.c | 185 const float voutput_max_less_zero_point = params->fp32_scalar_fmagic.output_max_less_zero_point; in xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic() local 186 vfpacc0x0 = __builtin_wasm_min_f32(vfpacc0x0, voutput_max_less_zero_point); in xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic() 187 vfpacc0x1 = __builtin_wasm_min_f32(vfpacc0x1, voutput_max_less_zero_point); in xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic() 188 vfpacc0x2 = __builtin_wasm_min_f32(vfpacc0x2, voutput_max_less_zero_point); in xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic() 189 vfpacc0x3 = __builtin_wasm_min_f32(vfpacc0x3, voutput_max_less_zero_point); in xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic() 190 vfpacc1x0 = __builtin_wasm_min_f32(vfpacc1x0, voutput_max_less_zero_point); in xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic() 191 vfpacc1x1 = __builtin_wasm_min_f32(vfpacc1x1, voutput_max_less_zero_point); in xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic() 192 vfpacc1x2 = __builtin_wasm_min_f32(vfpacc1x2, voutput_max_less_zero_point); in xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic() 193 vfpacc1x3 = __builtin_wasm_min_f32(vfpacc1x3, voutput_max_less_zero_point); in xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic() 194 vfpacc2x0 = __builtin_wasm_min_f32(vfpacc2x0, voutput_max_less_zero_point); in xnn_qs8_igemm_minmax_fp32_ukernel_4x4__wasm_fmagic() [all …]
|
D | 4x4-minmax-fp32-scalar-fmagic.c | 185 const float voutput_max_less_zero_point = params->fp32_scalar_fmagic.output_max_less_zero_point; in xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic() local 186 vfpacc0x0 = math_min_f32(vfpacc0x0, voutput_max_less_zero_point); in xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic() 187 vfpacc0x1 = math_min_f32(vfpacc0x1, voutput_max_less_zero_point); in xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic() 188 vfpacc0x2 = math_min_f32(vfpacc0x2, voutput_max_less_zero_point); in xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic() 189 vfpacc0x3 = math_min_f32(vfpacc0x3, voutput_max_less_zero_point); in xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic() 190 vfpacc1x0 = math_min_f32(vfpacc1x0, voutput_max_less_zero_point); in xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic() 191 vfpacc1x1 = math_min_f32(vfpacc1x1, voutput_max_less_zero_point); in xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic() 192 vfpacc1x2 = math_min_f32(vfpacc1x2, voutput_max_less_zero_point); in xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic() 193 vfpacc1x3 = math_min_f32(vfpacc1x3, voutput_max_less_zero_point); in xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic() 194 vfpacc2x0 = math_min_f32(vfpacc2x0, voutput_max_less_zero_point); in xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic() [all …]
|
D | 4x4-minmax-fp32-scalar-lrintf.c | 186 const float voutput_max_less_zero_point = params->fp32_scalar_lrintf.output_max_less_zero_point; in xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf() local 187 vfpacc0x0 = math_min_f32(vfpacc0x0, voutput_max_less_zero_point); in xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf() 188 vfpacc0x1 = math_min_f32(vfpacc0x1, voutput_max_less_zero_point); in xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf() 189 vfpacc0x2 = math_min_f32(vfpacc0x2, voutput_max_less_zero_point); in xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf() 190 vfpacc0x3 = math_min_f32(vfpacc0x3, voutput_max_less_zero_point); in xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf() 191 vfpacc1x0 = math_min_f32(vfpacc1x0, voutput_max_less_zero_point); in xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf() 192 vfpacc1x1 = math_min_f32(vfpacc1x1, voutput_max_less_zero_point); in xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf() 193 vfpacc1x2 = math_min_f32(vfpacc1x2, voutput_max_less_zero_point); in xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf() 194 vfpacc1x3 = math_min_f32(vfpacc1x3, voutput_max_less_zero_point); in xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf() 195 vfpacc2x0 = math_min_f32(vfpacc2x0, voutput_max_less_zero_point); in xnn_qs8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf() [all …]
|
/external/XNNPACK/src/qc8-igemm/gen/ |
D | 4x4-minmax-fp32-scalar-fmagic.c | 189 const float voutput_max_less_zero_point = params->fp32_scalar_fmagic.output_max_less_zero_point; in xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic() local 190 vfpacc0x0 = math_min_f32(vfpacc0x0, voutput_max_less_zero_point); in xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic() 191 vfpacc0x1 = math_min_f32(vfpacc0x1, voutput_max_less_zero_point); in xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic() 192 vfpacc0x2 = math_min_f32(vfpacc0x2, voutput_max_less_zero_point); in xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic() 193 vfpacc0x3 = math_min_f32(vfpacc0x3, voutput_max_less_zero_point); in xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic() 194 vfpacc1x0 = math_min_f32(vfpacc1x0, voutput_max_less_zero_point); in xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic() 195 vfpacc1x1 = math_min_f32(vfpacc1x1, voutput_max_less_zero_point); in xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic() 196 vfpacc1x2 = math_min_f32(vfpacc1x2, voutput_max_less_zero_point); in xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic() 197 vfpacc1x3 = math_min_f32(vfpacc1x3, voutput_max_less_zero_point); in xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic() 198 vfpacc2x0 = math_min_f32(vfpacc2x0, voutput_max_less_zero_point); in xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_fmagic() [all …]
|
D | 4x4-minmax-fp32-scalar-lrintf.c | 190 const float voutput_max_less_zero_point = params->fp32_scalar_lrintf.output_max_less_zero_point; in xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf() local 191 vfpacc0x0 = math_min_f32(vfpacc0x0, voutput_max_less_zero_point); in xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf() 192 vfpacc0x1 = math_min_f32(vfpacc0x1, voutput_max_less_zero_point); in xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf() 193 vfpacc0x2 = math_min_f32(vfpacc0x2, voutput_max_less_zero_point); in xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf() 194 vfpacc0x3 = math_min_f32(vfpacc0x3, voutput_max_less_zero_point); in xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf() 195 vfpacc1x0 = math_min_f32(vfpacc1x0, voutput_max_less_zero_point); in xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf() 196 vfpacc1x1 = math_min_f32(vfpacc1x1, voutput_max_less_zero_point); in xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf() 197 vfpacc1x2 = math_min_f32(vfpacc1x2, voutput_max_less_zero_point); in xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf() 198 vfpacc1x3 = math_min_f32(vfpacc1x3, voutput_max_less_zero_point); in xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf() 199 vfpacc2x0 = math_min_f32(vfpacc2x0, voutput_max_less_zero_point); in xnn_qc8_igemm_minmax_fp32_ukernel_4x4__scalar_lrintf() [all …]
|