/external/XNNPACK/src/f32-vbinary/gen/ |
D | vsubc-minmax-avx512f-x32.c | 42 __m512 vyGHIJKLMNOPQRSTUV = _mm512_sub_ps(vaGHIJKLMNOPQRSTUV, vb); in xnn_f32_vsubc_minmax_ukernel__avx512f_x32() local 46 vyGHIJKLMNOPQRSTUV = _mm512_max_ps(vyGHIJKLMNOPQRSTUV, vy_min); in xnn_f32_vsubc_minmax_ukernel__avx512f_x32() 49 vyGHIJKLMNOPQRSTUV = _mm512_min_ps(vyGHIJKLMNOPQRSTUV, vy_max); in xnn_f32_vsubc_minmax_ukernel__avx512f_x32() 52 _mm512_storeu_ps(y + 16, vyGHIJKLMNOPQRSTUV); in xnn_f32_vsubc_minmax_ukernel__avx512f_x32()
|
D | vrsubc-minmax-avx512f-x32.c | 42 __m512 vyGHIJKLMNOPQRSTUV = _mm512_sub_ps(vb, vaGHIJKLMNOPQRSTUV); in xnn_f32_vrsubc_minmax_ukernel__avx512f_x32() local 46 vyGHIJKLMNOPQRSTUV = _mm512_max_ps(vyGHIJKLMNOPQRSTUV, vy_min); in xnn_f32_vrsubc_minmax_ukernel__avx512f_x32() 49 vyGHIJKLMNOPQRSTUV = _mm512_min_ps(vyGHIJKLMNOPQRSTUV, vy_max); in xnn_f32_vrsubc_minmax_ukernel__avx512f_x32() 52 _mm512_storeu_ps(y + 16, vyGHIJKLMNOPQRSTUV); in xnn_f32_vrsubc_minmax_ukernel__avx512f_x32()
|
D | vaddc-minmax-avx512f-x32.c | 42 __m512 vyGHIJKLMNOPQRSTUV = _mm512_add_ps(vaGHIJKLMNOPQRSTUV, vb); in xnn_f32_vaddc_minmax_ukernel__avx512f_x32() local 46 vyGHIJKLMNOPQRSTUV = _mm512_max_ps(vyGHIJKLMNOPQRSTUV, vy_min); in xnn_f32_vaddc_minmax_ukernel__avx512f_x32() 49 vyGHIJKLMNOPQRSTUV = _mm512_min_ps(vyGHIJKLMNOPQRSTUV, vy_max); in xnn_f32_vaddc_minmax_ukernel__avx512f_x32() 52 _mm512_storeu_ps(y + 16, vyGHIJKLMNOPQRSTUV); in xnn_f32_vaddc_minmax_ukernel__avx512f_x32()
|
D | vdivc-minmax-avx512f-x32.c | 42 __m512 vyGHIJKLMNOPQRSTUV = _mm512_div_ps(vaGHIJKLMNOPQRSTUV, vb); in xnn_f32_vdivc_minmax_ukernel__avx512f_x32() local 46 vyGHIJKLMNOPQRSTUV = _mm512_max_ps(vyGHIJKLMNOPQRSTUV, vy_min); in xnn_f32_vdivc_minmax_ukernel__avx512f_x32() 49 vyGHIJKLMNOPQRSTUV = _mm512_min_ps(vyGHIJKLMNOPQRSTUV, vy_max); in xnn_f32_vdivc_minmax_ukernel__avx512f_x32() 52 _mm512_storeu_ps(y + 16, vyGHIJKLMNOPQRSTUV); in xnn_f32_vdivc_minmax_ukernel__avx512f_x32()
|
D | vrdivc-minmax-avx512f-x32.c | 42 __m512 vyGHIJKLMNOPQRSTUV = _mm512_div_ps(vb, vaGHIJKLMNOPQRSTUV); in xnn_f32_vrdivc_minmax_ukernel__avx512f_x32() local 46 vyGHIJKLMNOPQRSTUV = _mm512_max_ps(vyGHIJKLMNOPQRSTUV, vy_min); in xnn_f32_vrdivc_minmax_ukernel__avx512f_x32() 49 vyGHIJKLMNOPQRSTUV = _mm512_min_ps(vyGHIJKLMNOPQRSTUV, vy_max); in xnn_f32_vrdivc_minmax_ukernel__avx512f_x32() 52 _mm512_storeu_ps(y + 16, vyGHIJKLMNOPQRSTUV); in xnn_f32_vrdivc_minmax_ukernel__avx512f_x32()
|
D | vmulc-minmax-avx512f-x32.c | 42 __m512 vyGHIJKLMNOPQRSTUV = _mm512_mul_ps(vaGHIJKLMNOPQRSTUV, vb); in xnn_f32_vmulc_minmax_ukernel__avx512f_x32() local 46 vyGHIJKLMNOPQRSTUV = _mm512_max_ps(vyGHIJKLMNOPQRSTUV, vy_min); in xnn_f32_vmulc_minmax_ukernel__avx512f_x32() 49 vyGHIJKLMNOPQRSTUV = _mm512_min_ps(vyGHIJKLMNOPQRSTUV, vy_max); in xnn_f32_vmulc_minmax_ukernel__avx512f_x32() 52 _mm512_storeu_ps(y + 16, vyGHIJKLMNOPQRSTUV); in xnn_f32_vmulc_minmax_ukernel__avx512f_x32()
|
D | vdiv-minmax-avx512f-x32.c | 45 __m512 vyGHIJKLMNOPQRSTUV = _mm512_div_ps(vaGHIJKLMNOPQRSTUV, vbGHIJKLMNOPQRSTUV); in xnn_f32_vdiv_minmax_ukernel__avx512f_x32() local 49 vyGHIJKLMNOPQRSTUV = _mm512_max_ps(vyGHIJKLMNOPQRSTUV, vy_min); in xnn_f32_vdiv_minmax_ukernel__avx512f_x32() 52 vyGHIJKLMNOPQRSTUV = _mm512_min_ps(vyGHIJKLMNOPQRSTUV, vy_max); in xnn_f32_vdiv_minmax_ukernel__avx512f_x32() 55 _mm512_storeu_ps(y + 16, vyGHIJKLMNOPQRSTUV); in xnn_f32_vdiv_minmax_ukernel__avx512f_x32()
|
D | vmul-minmax-avx512f-x32.c | 45 __m512 vyGHIJKLMNOPQRSTUV = _mm512_mul_ps(vaGHIJKLMNOPQRSTUV, vbGHIJKLMNOPQRSTUV); in xnn_f32_vmul_minmax_ukernel__avx512f_x32() local 49 vyGHIJKLMNOPQRSTUV = _mm512_max_ps(vyGHIJKLMNOPQRSTUV, vy_min); in xnn_f32_vmul_minmax_ukernel__avx512f_x32() 52 vyGHIJKLMNOPQRSTUV = _mm512_min_ps(vyGHIJKLMNOPQRSTUV, vy_max); in xnn_f32_vmul_minmax_ukernel__avx512f_x32() 55 _mm512_storeu_ps(y + 16, vyGHIJKLMNOPQRSTUV); in xnn_f32_vmul_minmax_ukernel__avx512f_x32()
|
D | vadd-minmax-avx512f-x32.c | 45 __m512 vyGHIJKLMNOPQRSTUV = _mm512_add_ps(vaGHIJKLMNOPQRSTUV, vbGHIJKLMNOPQRSTUV); in xnn_f32_vadd_minmax_ukernel__avx512f_x32() local 49 vyGHIJKLMNOPQRSTUV = _mm512_max_ps(vyGHIJKLMNOPQRSTUV, vy_min); in xnn_f32_vadd_minmax_ukernel__avx512f_x32() 52 vyGHIJKLMNOPQRSTUV = _mm512_min_ps(vyGHIJKLMNOPQRSTUV, vy_max); in xnn_f32_vadd_minmax_ukernel__avx512f_x32() 55 _mm512_storeu_ps(y + 16, vyGHIJKLMNOPQRSTUV); in xnn_f32_vadd_minmax_ukernel__avx512f_x32()
|
D | vsub-minmax-avx512f-x32.c | 45 __m512 vyGHIJKLMNOPQRSTUV = _mm512_sub_ps(vaGHIJKLMNOPQRSTUV, vbGHIJKLMNOPQRSTUV); in xnn_f32_vsub_minmax_ukernel__avx512f_x32() local 49 vyGHIJKLMNOPQRSTUV = _mm512_max_ps(vyGHIJKLMNOPQRSTUV, vy_min); in xnn_f32_vsub_minmax_ukernel__avx512f_x32() 52 vyGHIJKLMNOPQRSTUV = _mm512_min_ps(vyGHIJKLMNOPQRSTUV, vy_max); in xnn_f32_vsub_minmax_ukernel__avx512f_x32() 55 _mm512_storeu_ps(y + 16, vyGHIJKLMNOPQRSTUV); in xnn_f32_vsub_minmax_ukernel__avx512f_x32()
|
D | vsqrdiffc-avx512f-x32.c | 40 __m512 vyGHIJKLMNOPQRSTUV = _mm512_sub_ps(vaGHIJKLMNOPQRSTUV, vb); in xnn_f32_vsqrdiffc_ukernel__avx512f_x32() local 43 vyGHIJKLMNOPQRSTUV = _mm512_mul_ps(vyGHIJKLMNOPQRSTUV, vyGHIJKLMNOPQRSTUV); in xnn_f32_vsqrdiffc_ukernel__avx512f_x32() 47 _mm512_storeu_ps(y + 16, vyGHIJKLMNOPQRSTUV); in xnn_f32_vsqrdiffc_ukernel__avx512f_x32()
|
D | vsqrdiff-avx512f-x32.c | 43 __m512 vyGHIJKLMNOPQRSTUV = _mm512_sub_ps(vaGHIJKLMNOPQRSTUV, vbGHIJKLMNOPQRSTUV); in xnn_f32_vsqrdiff_ukernel__avx512f_x32() local 46 vyGHIJKLMNOPQRSTUV = _mm512_mul_ps(vyGHIJKLMNOPQRSTUV, vyGHIJKLMNOPQRSTUV); in xnn_f32_vsqrdiff_ukernel__avx512f_x32() 50 _mm512_storeu_ps(y + 16, vyGHIJKLMNOPQRSTUV); in xnn_f32_vsqrdiff_ukernel__avx512f_x32()
|
/external/XNNPACK/src/amalgam/ |
D | avx512f.c | 1462 __m512 vyGHIJKLMNOPQRSTUV = _mm512_add_ps(vaGHIJKLMNOPQRSTUV, vbGHIJKLMNOPQRSTUV); in xnn_f32_vadd_minmax_ukernel__avx512f_x32() local 1466 vyGHIJKLMNOPQRSTUV = _mm512_max_ps(vyGHIJKLMNOPQRSTUV, vy_min); in xnn_f32_vadd_minmax_ukernel__avx512f_x32() 1469 vyGHIJKLMNOPQRSTUV = _mm512_min_ps(vyGHIJKLMNOPQRSTUV, vy_max); in xnn_f32_vadd_minmax_ukernel__avx512f_x32() 1472 _mm512_storeu_ps(y + 16, vyGHIJKLMNOPQRSTUV); in xnn_f32_vadd_minmax_ukernel__avx512f_x32() 1528 __m512 vyGHIJKLMNOPQRSTUV = _mm512_add_ps(vaGHIJKLMNOPQRSTUV, vb); in xnn_f32_vaddc_minmax_ukernel__avx512f_x32() local 1532 vyGHIJKLMNOPQRSTUV = _mm512_max_ps(vyGHIJKLMNOPQRSTUV, vy_min); in xnn_f32_vaddc_minmax_ukernel__avx512f_x32() 1535 vyGHIJKLMNOPQRSTUV = _mm512_min_ps(vyGHIJKLMNOPQRSTUV, vy_max); in xnn_f32_vaddc_minmax_ukernel__avx512f_x32() 1538 _mm512_storeu_ps(y + 16, vyGHIJKLMNOPQRSTUV); in xnn_f32_vaddc_minmax_ukernel__avx512f_x32() 1593 __m512 vyGHIJKLMNOPQRSTUV = _mm512_div_ps(vaGHIJKLMNOPQRSTUV, vbGHIJKLMNOPQRSTUV); in xnn_f32_vdiv_minmax_ukernel__avx512f_x32() local 1597 vyGHIJKLMNOPQRSTUV = _mm512_max_ps(vyGHIJKLMNOPQRSTUV, vy_min); in xnn_f32_vdiv_minmax_ukernel__avx512f_x32() [all …]
|
/external/XNNPACK/src/f32-qs8-vcvt/gen/ |
D | vcvt-neonv8-x32.c | 73 int8x16_t vyGHIJKLMNOPQRSTUV = vcombine_s8(vqmovn_s16(vaccGHIJKLMN), vqmovn_s16(vaccOPQRSTUV)); in xnn_f32_qs8_vcvt_ukernel__neonv8_x32() local 76 vyGHIJKLMNOPQRSTUV = vmaxq_s8(vyGHIJKLMNOPQRSTUV, voutput_min); in xnn_f32_qs8_vcvt_ukernel__neonv8_x32() 79 vyGHIJKLMNOPQRSTUV = vminq_s8(vyGHIJKLMNOPQRSTUV, voutput_max); in xnn_f32_qs8_vcvt_ukernel__neonv8_x32() 82 vst1q_s8(y, vyGHIJKLMNOPQRSTUV); y += 16; in xnn_f32_qs8_vcvt_ukernel__neonv8_x32()
|
D | vcvt-wasmsimd-cvt-x32.c | 83 v128_t vyGHIJKLMNOPQRSTUV = wasm_i8x16_narrow_i16x8(vaccGHIJKLMN, vaccOPQRSTUV); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x32() local 86 vyGHIJKLMNOPQRSTUV = wasm_i8x16_max(vyGHIJKLMNOPQRSTUV, voutput_min); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x32() 89 vyGHIJKLMNOPQRSTUV = wasm_i8x16_min(vyGHIJKLMNOPQRSTUV, voutput_max); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x32() 92 wasm_v128_store(y + 16, vyGHIJKLMNOPQRSTUV); in xnn_f32_qs8_vcvt_ukernel__wasmsimd_cvt_x32()
|
D | vcvt-neon-x32.c | 78 int8x16_t vyGHIJKLMNOPQRSTUV = vcombine_s8(vqmovn_s16(vaccGHIJKLMN), vqmovn_s16(vaccOPQRSTUV)); in xnn_f32_qs8_vcvt_ukernel__neon_x32() local 81 vyGHIJKLMNOPQRSTUV = vmaxq_s8(vyGHIJKLMNOPQRSTUV, voutput_min); in xnn_f32_qs8_vcvt_ukernel__neon_x32() 84 vyGHIJKLMNOPQRSTUV = vminq_s8(vyGHIJKLMNOPQRSTUV, voutput_max); in xnn_f32_qs8_vcvt_ukernel__neon_x32() 87 vst1q_s8(y, vyGHIJKLMNOPQRSTUV); y += 16; in xnn_f32_qs8_vcvt_ukernel__neon_x32()
|
/external/XNNPACK/src/f32-qu8-vcvt/gen/ |
D | vcvt-neonv8-x32.c | 73 … uint8x16_t vyGHIJKLMNOPQRSTUV = vcombine_u8(vqmovun_s16(vaccGHIJKLMN), vqmovun_s16(vaccOPQRSTUV)); in xnn_f32_qu8_vcvt_ukernel__neonv8_x32() local 76 vyGHIJKLMNOPQRSTUV = vmaxq_u8(vyGHIJKLMNOPQRSTUV, voutput_min); in xnn_f32_qu8_vcvt_ukernel__neonv8_x32() 79 vyGHIJKLMNOPQRSTUV = vminq_u8(vyGHIJKLMNOPQRSTUV, voutput_max); in xnn_f32_qu8_vcvt_ukernel__neonv8_x32() 82 vst1q_u8(y, vyGHIJKLMNOPQRSTUV); y += 16; in xnn_f32_qu8_vcvt_ukernel__neonv8_x32()
|
D | vcvt-wasmsimd-cvt-x32.c | 83 v128_t vyGHIJKLMNOPQRSTUV = wasm_u8x16_narrow_i16x8(vaccGHIJKLMN, vaccOPQRSTUV); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x32() local 86 vyGHIJKLMNOPQRSTUV = wasm_u8x16_max(vyGHIJKLMNOPQRSTUV, voutput_min); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x32() 89 vyGHIJKLMNOPQRSTUV = wasm_u8x16_min(vyGHIJKLMNOPQRSTUV, voutput_max); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x32() 92 wasm_v128_store(y + 16, vyGHIJKLMNOPQRSTUV); in xnn_f32_qu8_vcvt_ukernel__wasmsimd_cvt_x32()
|
D | vcvt-neon-x32.c | 78 … uint8x16_t vyGHIJKLMNOPQRSTUV = vcombine_u8(vqmovun_s16(vaccGHIJKLMN), vqmovun_s16(vaccOPQRSTUV)); in xnn_f32_qu8_vcvt_ukernel__neon_x32() local 81 vyGHIJKLMNOPQRSTUV = vmaxq_u8(vyGHIJKLMNOPQRSTUV, voutput_min); in xnn_f32_qu8_vcvt_ukernel__neon_x32() 84 vyGHIJKLMNOPQRSTUV = vminq_u8(vyGHIJKLMNOPQRSTUV, voutput_max); in xnn_f32_qu8_vcvt_ukernel__neon_x32() 87 vst1q_u8(y, vyGHIJKLMNOPQRSTUV); y += 16; in xnn_f32_qu8_vcvt_ukernel__neon_x32()
|
/external/XNNPACK/src/qu8-f32-vcvt/gen/ |
D | vcvt-avx512skx-x32.c | 41 __m512 vyGHIJKLMNOPQRSTUV = _mm512_cvtepi32_ps(vxGHIJKLMNOPQRSTUV); in xnn_qu8_f32_vcvt_ukernel__avx512skx_x32() local 44 vyGHIJKLMNOPQRSTUV = _mm512_mul_ps(vyGHIJKLMNOPQRSTUV, vscale); in xnn_qu8_f32_vcvt_ukernel__avx512skx_x32() 47 _mm512_storeu_ps(y + 16, vyGHIJKLMNOPQRSTUV); in xnn_qu8_f32_vcvt_ukernel__avx512skx_x32()
|
D | vcvt-avx512skx-x48.c | 43 __m512 vyGHIJKLMNOPQRSTUV = _mm512_cvtepi32_ps(vxGHIJKLMNOPQRSTUV); in xnn_qu8_f32_vcvt_ukernel__avx512skx_x48() local 47 vyGHIJKLMNOPQRSTUV = _mm512_mul_ps(vyGHIJKLMNOPQRSTUV, vscale); in xnn_qu8_f32_vcvt_ukernel__avx512skx_x48() 51 _mm512_storeu_ps(y + 16, vyGHIJKLMNOPQRSTUV); in xnn_qu8_f32_vcvt_ukernel__avx512skx_x48()
|
D | vcvt-avx512skx-x64.c | 45 __m512 vyGHIJKLMNOPQRSTUV = _mm512_cvtepi32_ps(vxGHIJKLMNOPQRSTUV); in xnn_qu8_f32_vcvt_ukernel__avx512skx_x64() local 50 vyGHIJKLMNOPQRSTUV = _mm512_mul_ps(vyGHIJKLMNOPQRSTUV, vscale); in xnn_qu8_f32_vcvt_ukernel__avx512skx_x64() 55 _mm512_storeu_ps(y + 16, vyGHIJKLMNOPQRSTUV); in xnn_qu8_f32_vcvt_ukernel__avx512skx_x64()
|
/external/XNNPACK/src/qs8-f32-vcvt/gen/ |
D | vcvt-avx512skx-x32.c | 41 __m512 vyGHIJKLMNOPQRSTUV = _mm512_cvtepi32_ps(vxGHIJKLMNOPQRSTUV); in xnn_qs8_f32_vcvt_ukernel__avx512skx_x32() local 44 vyGHIJKLMNOPQRSTUV = _mm512_mul_ps(vyGHIJKLMNOPQRSTUV, vscale); in xnn_qs8_f32_vcvt_ukernel__avx512skx_x32() 47 _mm512_storeu_ps(y + 16, vyGHIJKLMNOPQRSTUV); in xnn_qs8_f32_vcvt_ukernel__avx512skx_x32()
|
D | vcvt-avx512skx-x48.c | 43 __m512 vyGHIJKLMNOPQRSTUV = _mm512_cvtepi32_ps(vxGHIJKLMNOPQRSTUV); in xnn_qs8_f32_vcvt_ukernel__avx512skx_x48() local 47 vyGHIJKLMNOPQRSTUV = _mm512_mul_ps(vyGHIJKLMNOPQRSTUV, vscale); in xnn_qs8_f32_vcvt_ukernel__avx512skx_x48() 51 _mm512_storeu_ps(y + 16, vyGHIJKLMNOPQRSTUV); in xnn_qs8_f32_vcvt_ukernel__avx512skx_x48()
|
D | vcvt-avx512skx-x64.c | 45 __m512 vyGHIJKLMNOPQRSTUV = _mm512_cvtepi32_ps(vxGHIJKLMNOPQRSTUV); in xnn_qs8_f32_vcvt_ukernel__avx512skx_x64() local 50 vyGHIJKLMNOPQRSTUV = _mm512_mul_ps(vyGHIJKLMNOPQRSTUV, vscale); in xnn_qs8_f32_vcvt_ukernel__avx512skx_x64() 55 _mm512_storeu_ps(y + 16, vyGHIJKLMNOPQRSTUV); in xnn_qs8_f32_vcvt_ukernel__avx512skx_x64()
|